[FFmpeg-devel] [PATCH v2 12/14] vvcdec: add CTU parser

Nuo Mi nuomi2021 at gmail.com
Fri Jul 7 17:05:38 EEST 2023


---
 libavcodec/vvc/vvc_ctu.c | 2378 +++++++++++++++++++++++++++++++++++++-
 libavcodec/vvc/vvc_ctu.h |   11 +
 2 files changed, 2384 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vvc/vvc_ctu.c b/libavcodec/vvc/vvc_ctu.c
index d46a522a0d..4b86925ff7 100644
--- a/libavcodec/vvc/vvc_ctu.c
+++ b/libavcodec/vvc/vvc_ctu.c
@@ -22,8 +22,2359 @@
 
 #include "vvc_cabac.h"
 #include "vvc_ctu.h"
+#include "vvc_inter.h"
 #include "vvc_mvs.h"
 
+#define PROF_TEMP_SIZE (PROF_BLOCK_SIZE) * sizeof(int16_t)
+
+#define TAB_MSM(fc, depth, x, y) fc->tab.msm[(depth)][((y) >> 5) * fc->ps.pps->width32 + ((x) >> 5)]
+#define TAB_ISPMF(fc, x, y) fc->tab.ispmf[((y) >> 6) * fc->ps.pps->width64 + ((x) >> 6)]
+
+typedef enum VVCModeType {
+    MODE_TYPE_ALL,
+    MODE_TYPE_INTER,
+    MODE_TYPE_INTRA,
+} VVCModeType;
+
+static void set_tb_pos(const VVCFrameContext *fc, const TransformBlock *tb)
+{
+    const int x_tb      = tb->x0 >> MIN_TU_LOG2;
+    const int y_tb      = tb->y0 >> MIN_TU_LOG2;
+    const int hs        = fc->ps.sps->hshift[tb->c_idx];
+    const int vs        = fc->ps.sps->vshift[tb->c_idx];
+    const int is_chroma = tb->c_idx != 0;
+    const int width     = FFMAX(1, tb->tb_width >> (MIN_TU_LOG2 - hs));
+    const int end       = y_tb + FFMAX(1, tb->tb_height >> (MIN_TU_LOG2 - vs));
+
+    for (int y = y_tb; y < end; y++) {
+        const int off = y * fc->ps.pps->min_tu_width + x_tb;
+        for (int i = 0; i < width; i++) {
+            fc->tab.tb_pos_x0[is_chroma][off + i] = tb->x0;
+            fc->tab.tb_pos_y0[is_chroma][off + i] = tb->y0;
+        }
+        memset(fc->tab.tb_width [is_chroma] + off, tb->tb_width,  width);
+        memset(fc->tab.tb_height[is_chroma] + off, tb->tb_height, width);
+    }
+}
+
+static void set_tb_tab(uint8_t *tab, uint8_t v, const VVCFrameContext *fc,
+    const TransformBlock *tb)
+{
+    const int width  = tb->tb_width  << fc->ps.sps->hshift[tb->c_idx];
+    const int height = tb->tb_height << fc->ps.sps->vshift[tb->c_idx];
+
+    for (int h = 0; h < height; h += MIN_TU_SIZE) {
+        const int y = (tb->y0 + h) >> MIN_TU_LOG2;
+        const int off = y * fc->ps.pps->min_tu_width + (tb->x0 >> MIN_TU_LOG2);
+        const int w = FFMAX(1, width >> MIN_TU_LOG2);
+        memset(tab + off, v, w);
+    }
+}
+
+// 8.7.1 Derivation process for quantization parameters
+static int get_qp_y_pred(const VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc     = lc->fc;
+    const VVCSPS *sps       = fc->ps.sps;
+    const VVCPPS *pps       = fc->ps.pps;
+    const CodingUnit *cu    = lc->cu;
+    const int ctb_log2_size = sps->ctb_log2_size_y;
+    const int ctb_size_mask = (1 << ctb_log2_size) - 1;
+    const int xQg           = lc->parse.cu_qg_top_left_x;
+    const int yQg           = lc->parse.cu_qg_top_left_y;
+    const int min_cb_width  = fc->ps.pps->min_cb_width;
+    const int x_cb          = cu->x0 >> sps->min_cb_log2_size_y;
+    const int y_cb          = cu->y0 >> sps->min_cb_log2_size_y;
+    const int x_ctb         = cu->x0 >> ctb_log2_size;
+    const int y_ctb         = cu->y0 >> ctb_log2_size;
+    const int in_same_ctb_a = ((xQg - 1) >> ctb_log2_size) == x_ctb && (yQg >> ctb_log2_size) == y_ctb;
+    const int in_same_ctb_b = (xQg >> ctb_log2_size) == x_ctb && ((yQg - 1) >> ctb_log2_size) == y_ctb;
+    int qPy_pred, qPy_a, qPy_b;
+
+    if (lc->na.cand_up) {
+        const int first_qg_in_ctu = !(xQg & ctb_size_mask) &&  !(yQg & ctb_size_mask);
+        const int qPy_up          = fc->tab.qp[LUMA][x_cb + (y_cb - 1) * min_cb_width];
+        if (first_qg_in_ctu && pps->ctb_to_col_bd[xQg >> ctb_log2_size] == xQg)
+            return qPy_up;
+    }
+
+    // qPy_pred
+    qPy_pred = lc->ep->is_first_qg ? lc->sc->sh.slice_qp_y : lc->ep->qp_y;
+
+    // qPy_b
+    if (!lc->na.cand_up || !in_same_ctb_b)
+        qPy_b = qPy_pred;
+    else
+        qPy_b = fc->tab.qp[LUMA][x_cb + (y_cb - 1) * min_cb_width];
+
+    // qPy_a
+    if (!lc->na.cand_left || !in_same_ctb_a)
+        qPy_a = qPy_pred;
+    else
+        qPy_a = fc->tab.qp[LUMA][(x_cb - 1) + y_cb * min_cb_width];
+
+    av_assert2(qPy_a >= -fc->ps.sps->qp_bd_offset && qPy_a < 63);
+    av_assert2(qPy_b >= -fc->ps.sps->qp_bd_offset && qPy_b < 63);
+
+    return (qPy_a + qPy_b + 1) >> 1;
+}
+
+static void set_cb_tab(const VVCLocalContext *lc, uint8_t *tab, const uint8_t v)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCPPS *pps           = fc->ps.pps;
+    const CodingUnit *cu        = lc->cu;
+    const int log2_min_cb_size  = fc->ps.sps->min_cb_log2_size_y;
+    const int x_cb              = cu->x0 >> log2_min_cb_size;
+    const int y_cb              = cu->y0 >> log2_min_cb_size;
+    const int cb_width          = cu->cb_width;
+    const int cb_height         = cu->cb_height;
+    int x                       = y_cb * pps->min_cb_width + x_cb;
+
+    for (int y = 0; y < (cb_height >> log2_min_cb_size); y++) {
+        const int width = cb_width >> log2_min_cb_size;
+
+        memset(&tab[x], v, width);
+        x += pps->min_cb_width;
+    }
+}
+
+static int set_qp_y(VVCLocalContext *lc, const int x0, const int y0, const int has_qp_delta)
+{
+    const VVCSPS *sps   = lc->fc->ps.sps;
+    EntryPoint *ep      = lc->ep;
+    CodingUnit *cu      = lc->cu;
+    int cu_qp_delta     = 0;
+
+    if (!lc->fc->ps.pps->cu_qp_delta_enabled_flag) {
+        ep->qp_y = lc->sc->sh.slice_qp_y;
+    } else if (ep->is_first_qg || (lc->parse.cu_qg_top_left_x == x0 && lc->parse.cu_qg_top_left_y == y0)) {
+        ep->qp_y = get_qp_y_pred(lc);
+        ep->is_first_qg = 0;
+    }
+
+    if (has_qp_delta) {
+        const int cu_qp_delta_abs = ff_vvc_cu_qp_delta_abs(lc);
+
+        if (cu_qp_delta_abs)
+            cu_qp_delta = ff_vvc_cu_qp_delta_sign_flag(lc) ? -cu_qp_delta_abs : cu_qp_delta_abs;
+        if (cu_qp_delta > (31 + sps->qp_bd_offset / 2) || cu_qp_delta < -(32 + sps->qp_bd_offset / 2))
+            return AVERROR_INVALIDDATA;
+        lc->parse.is_cu_qp_delta_coded = 1;
+
+        if (cu_qp_delta) {
+            int off = sps->qp_bd_offset;
+            ep->qp_y = FFUMOD(ep->qp_y + cu_qp_delta + 64 + 2 * off, 64 + off) - off;
+        }
+    }
+
+    set_cb_tab(lc, lc->fc->tab.qp[LUMA], ep->qp_y);
+    cu->qp[LUMA] = ep->qp_y;
+
+    return 0;
+}
+
+static void set_qp_c_tab(const VVCLocalContext *lc, const TransformUnit *tu, const TransformBlock *tb)
+{
+    const int is_jcbcr = tu->joint_cbcr_residual_flag && tu->coded_flag[CB] && tu->coded_flag[CR];
+    const int idx = is_jcbcr ? JCBCR : tb->c_idx;
+
+    set_tb_tab(lc->fc->tab.qp[tb->c_idx], lc->cu->qp[idx], lc->fc, tb);
+}
+
+static void set_qp_c(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const VVCSH *sh             = &lc->sc->sh;
+    CodingUnit *cu              = lc->cu;
+    const int x_center          = cu->x0 + cu->cb_width  / 2;
+    const int y_center          = cu->y0 + cu->cb_height / 2;
+    const int single_tree       = cu->tree_type == SINGLE_TREE;
+    const int qp_luma           = (single_tree ? lc->ep->qp_y : ff_vvc_get_qPy(fc, x_center, y_center)) + sps->qp_bd_offset;
+    const int qp_chroma         = av_clip(qp_luma, 0, MAX_QP + sps->qp_bd_offset);
+    int qp;
+
+    for (int i = CB - 1; i < CR + sps->joint_cbcr_enabled_flag; i++) {
+        qp = sps->chroma_qp_table[i][qp_chroma];
+        qp = qp + pps->chroma_qp_offset[i] + sh->chroma_qp_offset[i] + lc->parse.chroma_qp_offset[i];
+        qp = av_clip(qp, -sps->qp_bd_offset, MAX_QP) + sps->qp_bd_offset;
+        cu->qp[i + 1] = qp;
+    }
+}
+
+static TransformUnit* alloc_tu(VVCFrameContext *fc, CodingUnit *cu)
+{
+    TransformUnit *tu;
+    AVBufferRef *buf = av_buffer_pool_get(fc->tu_pool);
+    if (!buf)
+        return NULL;
+
+    tu = (TransformUnit *)buf->data;
+    tu->next = NULL;
+    tu->buf = buf;
+
+    if (cu->tus.tail)
+        cu->tus.tail->next =  tu;
+    else
+        cu->tus.head = tu;
+    cu->tus.tail = tu;
+
+    return tu;
+}
+
+static TransformUnit* add_tu(VVCFrameContext *fc, CodingUnit *cu, const int x0, const int y0, const int tu_width, const int tu_height)
+{
+    TransformUnit *tu = alloc_tu(fc, cu);
+
+    if (!tu)
+        return NULL;
+
+    tu->x0 = x0;
+    tu->y0 = y0;
+    tu->width = tu_width;
+    tu->height = tu_height;
+    tu->joint_cbcr_residual_flag = 0;
+    memset(tu->coded_flag, 0, sizeof(tu->coded_flag));
+    tu->nb_tbs = 0;
+
+    return tu;
+}
+
+static TransformBlock* add_tb(TransformUnit *tu, VVCLocalContext *lc,
+    const int x0, const int y0, const int tb_width, const int tb_height, const int c_idx)
+{
+    TransformBlock *tb;
+
+    tb = &tu->tbs[tu->nb_tbs++];
+    tb->has_coeffs = 0;
+    tb->x0 = x0;
+    tb->y0 = y0;
+    tb->tb_width  = tb_width;
+    tb->tb_height = tb_height;
+    tb->log2_tb_width  = log2(tb_width);
+    tb->log2_tb_height = log2(tb_height);
+
+    tb->max_scan_x = tb->max_scan_y = 0;
+    tb->min_scan_x = tb->min_scan_y = 0;
+
+    tb->c_idx = c_idx;
+    tb->ts = 0;
+    tb->coeffs = lc->coeffs;
+    lc->coeffs += tb_width * tb_height;
+    return tb;
+}
+
+static uint8_t tu_y_coded_flag_decode(VVCLocalContext *lc, const int is_sbt_not_coded,
+    const int sub_tu_index, const int is_isp, const int is_chroma_coded)
+{
+    uint8_t tu_y_coded_flag = 0;
+    const VVCSPS *sps       = lc->fc->ps.sps;
+    CodingUnit *cu          = lc->cu;
+
+    if (!is_sbt_not_coded) {
+        int has_y_coded_flag = sub_tu_index < cu->num_intra_subpartitions - 1 || !lc->parse.infer_tu_cbf_luma;
+        if (!is_isp) {
+            const int is_large = cu->cb_width > sps->max_tb_size_y || cu->cb_height > sps->max_tb_size_y;
+            has_y_coded_flag = (cu->pred_mode == MODE_INTRA && !cu->act_enabled_flag) || is_chroma_coded || is_large;
+        }
+        tu_y_coded_flag = has_y_coded_flag ? ff_vvc_tu_y_coded_flag(lc) : 1;
+    }
+    if (is_isp)
+        lc->parse.infer_tu_cbf_luma = lc->parse.infer_tu_cbf_luma && !tu_y_coded_flag;
+    return tu_y_coded_flag;
+}
+
+static void chroma_qp_offset_decode(VVCLocalContext *lc, const int is_128, const int is_chroma_coded)
+{
+    const VVCPPS *pps   = lc->fc->ps.pps;
+    const VVCSH *sh     = &lc->sc->sh;
+
+    if ((is_128 || is_chroma_coded) &&
+        sh->cu_chroma_qp_offset_enabled_flag && !lc->parse.is_cu_chroma_qp_offset_coded) {
+        const int cu_chroma_qp_offset_flag = ff_vvc_cu_chroma_qp_offset_flag(lc);
+        if (cu_chroma_qp_offset_flag) {
+            int cu_chroma_qp_offset_idx = 0;
+            if (pps->chroma_qp_offset_list_len_minus1 > 0)
+                cu_chroma_qp_offset_idx = ff_vvc_cu_chroma_qp_offset_idx(lc);
+            for (int i = CB - 1; i < JCBCR; i++)
+                lc->parse.chroma_qp_offset[i] = pps->chroma_qp_offset_list[cu_chroma_qp_offset_idx][i];
+        } else {
+            memset(lc->parse.chroma_qp_offset, 0, sizeof(lc->parse.chroma_qp_offset));
+        }
+        lc->parse.is_cu_chroma_qp_offset_coded = 1;
+    }
+}
+
+static int hls_transform_unit(VVCLocalContext *lc, int x0, int y0,int tu_width, int tu_height, int sub_tu_index, int ch_type)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps   = fc->ps.sps;
+    const VVCPPS *pps   = fc->ps.pps;
+    CodingUnit *cu      = lc->cu;
+    TransformUnit *tu   = add_tu(fc, cu, x0, y0, tu_width, tu_height);
+    const int min_cb_width      = pps->min_cb_width;
+    const VVCTreeType tree_type = cu->tree_type;
+    const int is_128            = cu->cb_width > 64 || cu->cb_height > 64;
+    const int is_isp            = cu->isp_split_type != ISP_NO_SPLIT;
+    const int is_isp_last_tu    = is_isp && (sub_tu_index == cu->num_intra_subpartitions - 1);
+    const int is_sbt_not_coded  = cu->sbt_flag &&
+        ((sub_tu_index == 0 && cu->sbt_pos_flag) || (sub_tu_index == 1 && !cu->sbt_pos_flag));
+    const int chroma_available  = tree_type != DUAL_TREE_LUMA && sps->chroma_format_idc &&
+        (!is_isp || is_isp_last_tu);
+    int ret, xc, yc, wc, hc, is_chroma_coded;
+
+    if (!tu)
+        return AVERROR_INVALIDDATA;
+
+    if (tree_type == SINGLE_TREE && is_isp_last_tu) {
+        const int x_cu = x0 >> fc->ps.sps->min_cb_log2_size_y;
+        const int y_cu = y0 >> fc->ps.sps->min_cb_log2_size_y;
+        xc = SAMPLE_CTB(fc->tab.cb_pos_x[ch_type],  x_cu, y_cu);
+        yc = SAMPLE_CTB(fc->tab.cb_pos_y[ch_type],  x_cu, y_cu);
+        wc = SAMPLE_CTB(fc->tab.cb_width[ch_type],  x_cu, y_cu);
+        hc = SAMPLE_CTB(fc->tab.cb_height[ch_type], x_cu, y_cu);
+    } else {
+        xc = x0, yc = y0, wc = tu_width, hc = tu_height;
+    }
+
+    if (chroma_available && !is_sbt_not_coded) {
+        tu->coded_flag[CB] = ff_vvc_tu_cb_coded_flag(lc);
+        tu->coded_flag[CR] = ff_vvc_tu_cr_coded_flag(lc, tu->coded_flag[CB]);
+    }
+
+    is_chroma_coded = chroma_available && (tu->coded_flag[CB] || tu->coded_flag[CR]);
+
+    if (tree_type != DUAL_TREE_CHROMA) {
+        int has_qp_delta;
+        tu->coded_flag[LUMA] = tu_y_coded_flag_decode(lc, is_sbt_not_coded, sub_tu_index, is_isp, is_chroma_coded);
+        has_qp_delta = (is_128 || tu->coded_flag[LUMA] || is_chroma_coded) &&
+            pps->cu_qp_delta_enabled_flag && !lc->parse.is_cu_qp_delta_coded;
+        ret = set_qp_y(lc, x0, y0, has_qp_delta);
+        if (ret < 0)
+            return ret;
+        add_tb(tu, lc, x0, y0, tu_width, tu_height, LUMA);
+    }
+    if (tree_type != DUAL_TREE_LUMA) {
+        chroma_qp_offset_decode(lc, is_128, is_chroma_coded);
+        if (chroma_available) {
+            const int hs = sps->hshift[CHROMA];
+            const int vs = sps->vshift[CHROMA];
+            add_tb(tu, lc, xc, yc, wc >> hs, hc >> vs, CB);
+            add_tb(tu, lc, xc, yc, wc >> hs, hc >> vs, CR);
+        }
+    }
+    if (sps->joint_cbcr_enabled_flag && ((cu->pred_mode == MODE_INTRA &&
+        (tu->coded_flag[CB] || tu->coded_flag[CR])) ||
+        (tu->coded_flag[CB] && tu->coded_flag[CR])) &&
+        chroma_available) {
+        tu->joint_cbcr_residual_flag = ff_vvc_tu_joint_cbcr_residual_flag(lc, tu->coded_flag[1], tu->coded_flag[2]);
+    }
+
+    for (int i = 0; i < tu->nb_tbs; i++) {
+        TransformBlock *tb  = &tu->tbs[i];
+        const int is_chroma = tb->c_idx != LUMA;
+        tb->has_coeffs = tu->coded_flag[tb->c_idx];
+        if (tb->has_coeffs && is_chroma)
+            tb->has_coeffs = tb->c_idx == CB ? 1 : !(tu->coded_flag[CB] && tu->joint_cbcr_residual_flag);
+        if (tb->has_coeffs) {
+            tb->ts = cu->bdpcm_flag[tb->c_idx];
+            if (sps->transform_skip_enabled_flag && !cu->bdpcm_flag[tb->c_idx] &&
+                tb->tb_width <= sps->max_ts_size && tb->tb_height <= sps->max_ts_size &&
+                !cu->sbt_flag && (is_chroma || !is_isp)) {
+                tb->ts = ff_vvc_transform_skip_flag(lc, is_chroma);
+            }
+            ret = ff_vvc_residual_coding(lc, tb);
+            if (ret < 0)
+                return ret;
+            set_tb_tab(fc->tab.tu_coded_flag[tb->c_idx], tu->coded_flag[tb->c_idx], fc, tb);
+        }
+        if (tb->c_idx != CR)
+            set_tb_pos(fc, tb);
+        if (tb->c_idx == CB)
+            set_tb_tab(fc->tab.tu_joint_cbcr_residual_flag, tu->joint_cbcr_residual_flag, fc, tb);
+    }
+
+    return 0;
+}
+
+static int hls_transform_tree(VVCLocalContext *lc, int x0, int y0,int tu_width, int tu_height, int ch_type)
+{
+    const CodingUnit *cu = lc->cu;
+    const VVCSPS *sps = lc->fc->ps.sps;
+    int ret;
+
+    lc->parse.infer_tu_cbf_luma = 1;
+    if (cu->isp_split_type == ISP_NO_SPLIT && !cu->sbt_flag) {
+        if (tu_width > sps->max_tb_size_y || tu_height > sps->max_tb_size_y) {
+            const int ver_split_first = tu_width > sps->max_tb_size_y && tu_width > tu_height;
+            const int trafo_width  =  ver_split_first ? (tu_width  / 2) : tu_width;
+            const int trafo_height = !ver_split_first ? (tu_height / 2) : tu_height;
+
+            #define TRANSFORM_TREE(x, y) do {                                           \
+                ret = hls_transform_tree(lc, x, y, trafo_width, trafo_height, ch_type);  \
+                if (ret < 0)                                                            \
+                    return ret;                                                         \
+            } while (0)
+
+            TRANSFORM_TREE(x0, y0);
+            if (ver_split_first)
+                TRANSFORM_TREE(x0 + trafo_width, y0);
+            else
+                TRANSFORM_TREE(x0, y0 + trafo_height);
+
+        } else {
+            ret = hls_transform_unit(lc, x0, y0, tu_width, tu_height, 0, ch_type);
+            if (ret < 0)
+                return ret;
+
+        }
+    } else if (cu->sbt_flag) {
+        if (!cu->sbt_horizontal_flag) {
+            #define TRANSFORM_UNIT(x, width, idx) do {                              \
+                ret = hls_transform_unit(lc, x, y0, width, tu_height, idx, ch_type); \
+                if (ret < 0)                                                        \
+                    return ret;                                                     \
+            } while (0)
+
+            const int trafo_width = tu_width * lc->parse.sbt_num_fourths_tb0 / 4;
+            TRANSFORM_UNIT(x0, trafo_width, 0);
+            TRANSFORM_UNIT(x0 + trafo_width, tu_width - trafo_width, 1);
+
+            #undef TRANSFORM_UNIT
+        } else {
+            #define TRANSFORM_UNIT(y, height, idx) do {                             \
+                ret = hls_transform_unit(lc, x0, y, tu_width, height, idx, ch_type); \
+                if (ret < 0)                                                        \
+                    return ret;                                                     \
+            } while (0)
+
+            const int trafo_height = tu_height * lc->parse.sbt_num_fourths_tb0 / 4;
+            TRANSFORM_UNIT(y0, trafo_height, 0);
+            TRANSFORM_UNIT(y0 + trafo_height, tu_height - trafo_height, 1);
+
+            #undef TRANSFORM_UNIT
+        }
+    } else if (cu->isp_split_type == ISP_HOR_SPLIT) {
+        const int trafo_height = tu_height / cu->num_intra_subpartitions;
+        for (int i = 0; i < cu->num_intra_subpartitions; i++) {
+            ret = hls_transform_unit(lc, x0, y0 + trafo_height * i, tu_width, trafo_height, i, 0);
+            if (ret < 0)
+                return ret;
+        }
+    } else if (cu->isp_split_type == ISP_VER_SPLIT) {
+        const int trafo_width = tu_width / cu->num_intra_subpartitions;
+        for (int i = 0; i < cu->num_intra_subpartitions; i++) {
+            ret = hls_transform_unit(lc, x0 + trafo_width * i , y0, trafo_width, tu_height, i, 0);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int skipped_transform_tree(VVCLocalContext *lc, int x0, int y0,int tu_width, int tu_height)
+{
+    VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+
+    if (tu_width > sps->max_tb_size_y || tu_height > sps->max_tb_size_y) {
+        const int ver_split_first = tu_width > sps->max_tb_size_y && tu_width > tu_height;
+        const int trafo_width  =  ver_split_first ? (tu_width  / 2) : tu_width;
+        const int trafo_height = !ver_split_first ? (tu_height / 2) : tu_height;
+
+        #define SKIPPED_TRANSFORM_TREE(x, y) do {                                   \
+            int ret = skipped_transform_tree(lc, x, y, trafo_width, trafo_height);  \
+            if (ret < 0)                                                            \
+                return ret;                                                         \
+        } while (0)
+
+        SKIPPED_TRANSFORM_TREE(x0, y0);
+        if (ver_split_first)
+            SKIPPED_TRANSFORM_TREE(x0 + trafo_width, y0);
+        else
+            SKIPPED_TRANSFORM_TREE(x0, y0 + trafo_height);
+    } else {
+        TransformUnit *tu = add_tu(fc, lc->cu, x0, y0, tu_width, tu_height);
+        const int c_end = sps->chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : (LUMA + 1);
+        if (!tu)
+            return AVERROR_INVALIDDATA;
+        for (int i = LUMA; i < c_end; i++) {
+            TransformBlock *tb = add_tb(tu, lc, x0, y0, tu_width >> sps->hshift[i], tu_height >> sps->vshift[i], i);
+            if (i != CR)
+                set_tb_pos(fc, tb);
+        }
+    }
+
+    return 0;
+}
+
+//6.4.1 Allowed quad split process
+//6.4.2 Allowed binary split process
+//6.4.3 Allowed ternary split process
+static void can_split(const VVCLocalContext *lc, int x0, int y0,int cb_width, int cb_height,
+     int mtt_depth, int depth_offset, int part_idx, VVCSplitMode last_split_mode,
+     VVCTreeType tree_type, VVCModeType mode_type, VVCAllowedSplit* split)
+{
+    int min_qt_size, max_bt_size, max_tt_size, max_mtt_depth;
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSH *sh             = &lc->sc->sh;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const int chroma            = tree_type == DUAL_TREE_CHROMA;
+    int min_cb_size_y           = sps->min_cb_size_y;
+    int *qt                     = &split->qt;
+    int *btv                    = &split->btv;
+    int *bth                    = &split->bth;
+    int *ttv                    = &split->ttv;
+    int *tth                    = &split->tth;
+
+    *qt = *bth = *btv = *tth = *ttv = 1;
+
+    if (mtt_depth)
+        *qt = 0;
+
+    min_qt_size = sh->min_qt_size[chroma];
+    if (cb_width <= min_qt_size)
+        *qt = 0;
+
+    if (chroma) {
+        int chroma_area = (cb_width >> sps->hshift[1]) * (cb_height >> sps->vshift[1]);
+        int chroma_width = cb_width >> sps->hshift[1];
+
+        if (chroma_width == 8)
+            *ttv = 0;
+        else if (chroma_width <= 4) {
+            if (chroma_width == 4)
+                *btv = 0;
+            *qt = 0;
+        }
+        if (mode_type == MODE_TYPE_INTRA)
+            *qt = *btv = *bth = *ttv = *tth = 0;
+        if (chroma_area <= 32) {
+            *ttv = *tth = 0;
+            if (chroma_area <= 16)
+                *btv = *bth = 0;
+        }
+    }
+    max_bt_size = sh->max_bt_size[chroma];
+    max_tt_size = sh->max_tt_size[chroma];
+    max_mtt_depth = sh->max_mtt_depth[chroma] + depth_offset;
+
+    if (mode_type == MODE_TYPE_INTER) {
+        int area = cb_width * cb_height;
+        if (area == 32)
+            *btv = *bth = 0;
+        else if (area == 64)
+            *ttv = *tth = 0;
+    }
+    if (cb_width <= 2 * min_cb_size_y) {
+        *ttv = 0;
+        if (cb_width <= min_cb_size_y)
+            *btv = 0;
+    }
+    if (cb_height <= 2 * min_cb_size_y) {
+        *tth = 0;
+        if (cb_height <= min_cb_size_y)
+            *bth = 0;
+    }
+    if (cb_width > max_bt_size || cb_height > max_bt_size)
+        *btv = *bth = 0;
+    max_tt_size = FFMIN(64, max_tt_size);
+    if (cb_width > max_tt_size || cb_height > max_tt_size)
+        *ttv = *tth = 0;
+    if (mtt_depth >= max_mtt_depth)
+        *btv = *bth = *ttv = *tth = 0;
+    if (x0 + cb_width > pps->width) {
+        *ttv = *tth = 0;
+        if (cb_height > 64)
+            *btv = 0;
+        if (y0 + cb_height <= pps->height)
+            *bth = 0;
+        else if (cb_width > min_qt_size)
+            *btv = *bth = 0;
+    }
+    if (y0 + cb_height > pps->height) {
+        *btv = *ttv = *tth = 0;
+        if (cb_width > 64)
+            *bth = 0;
+    }
+    if (mtt_depth > 0 && part_idx  == 1)  {
+        if (last_split_mode == SPLIT_TT_VER)
+            *btv = 0;
+        else if (last_split_mode == SPLIT_TT_HOR)
+            *bth = 0;
+    }
+    if (cb_width <= 64 && cb_height > 64)
+        *btv = 0;
+    if (cb_width > 64 && cb_height <= 64)
+        *bth = 0;
+}
+
+static int get_num_intra_subpartitions(enum IspType isp_split_type, int cb_width, int cb_height)
+{
+    if (isp_split_type == ISP_NO_SPLIT)
+        return 1;
+    if ((cb_width == 4 && cb_height == 8) || (cb_width == 8 && cb_height == 4))
+        return 2;
+    return 4;
+}
+
+static int get_cclm_enabled(const VVCLocalContext *lc, const int x0, const int y0)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps   = fc->ps.sps;
+    int enabled = 0;
+
+    if (!sps->cclm_enabled_flag)
+        return 0;
+    if (!sps->qtbtt_dual_tree_intra_flag || !IS_I(&lc->sc->sh) || sps->ctb_log2_size_y < 6)
+        return 1;
+    else {
+        const int x64 = x0 >> 6 << 6;
+        const int y64 = y0 >> 6 << 6;
+        const int y32 = y0 >> 5 << 5;
+        const int x64_cu = x64 >> fc->ps.sps->min_cb_log2_size_y;
+        const int y64_cu = y64 >> fc->ps.sps->min_cb_log2_size_y;
+        const int y32_cu = y32 >> fc->ps.sps->min_cb_log2_size_y;
+        const int min_cb_width = fc->ps.pps->min_cb_width;
+        const int depth = SAMPLE_CTB(fc->tab.cqt_depth[1], x64_cu, y64_cu);
+        const int min_depth = fc->ps.sps->ctb_log2_size_y - 6;
+        const VVCSplitMode msm64 = (VVCSplitMode)TAB_MSM(fc, 0, x64, y64);
+        const VVCSplitMode msm32 = (VVCSplitMode)TAB_MSM(fc, 1, x64, y32);
+
+        enabled = SAMPLE_CTB(fc->tab.cb_width[1], x64_cu, y64_cu) == 64 &&
+            SAMPLE_CTB(fc->tab.cb_height[1], x64_cu, y64_cu) == 64;
+        enabled |= depth == min_depth && msm64 == SPLIT_BT_HOR &&
+            SAMPLE_CTB(fc->tab.cb_width[1], x64_cu, y32_cu) == 64 &&
+            SAMPLE_CTB(fc->tab.cb_height[1], x64_cu, y32_cu) == 32;
+        enabled |= depth > min_depth;
+        enabled |= depth == min_depth && msm64 == SPLIT_BT_HOR && msm32 == SPLIT_BT_VER;
+
+        if (enabled) {
+            const int w = SAMPLE_CTB(fc->tab.cb_width[0], x64_cu, y64_cu);
+            const int h = SAMPLE_CTB(fc->tab.cb_height[0], x64_cu, y64_cu);
+            const int depth0 = SAMPLE_CTB(fc->tab.cqt_depth[0], x64_cu, y64_cu);
+            if ((w == 64 && h == 64 && TAB_ISPMF(fc, x64, y64)) ||
+                ((w < 64 || h < 64) && depth0 == min_depth))
+                return 0;
+        }
+
+    }
+
+    return enabled;
+}
+
+static int less(const void *a, const void *b)
+{
+    return *(const int*)a - *(const int*)b;
+}
+
+//8.4.2 Derivation process for luma intra prediction mode
+static enum IntraPredMode luma_intra_pred_mode(VVCLocalContext* lc, const int intra_subpartitions_mode_flag)
+{
+    VVCFrameContext *fc     = lc->fc;
+    CodingUnit *cu          = lc->cu;
+    const int x0            = cu->x0;
+    const int y0            = cu->y0;
+    enum IntraPredMode pred;
+    int intra_luma_not_planar_flag = 1;
+    int intra_luma_mpm_remainder = 0;
+    int intra_luma_mpm_flag = 1;
+    int intra_luma_mpm_idx = 0;
+
+    if (!cu->intra_luma_ref_idx)
+        intra_luma_mpm_flag = ff_vvc_intra_luma_mpm_flag(lc);
+    if (intra_luma_mpm_flag) {
+        if (!cu->intra_luma_ref_idx)
+            intra_luma_not_planar_flag = ff_vvc_intra_luma_not_planar_flag(lc, intra_subpartitions_mode_flag);
+        if (intra_luma_not_planar_flag)
+            intra_luma_mpm_idx = ff_vvc_intra_luma_mpm_idx(lc);
+    } else {
+        intra_luma_mpm_remainder = ff_vvc_intra_luma_mpm_remainder(lc);
+    }
+
+    if (!intra_luma_not_planar_flag) {
+        pred = INTRA_PLANAR;
+    } else {
+        const VVCSPS *sps       = fc->ps.sps;
+        const int x_a           = (x0 - 1) >> sps->min_cb_log2_size_y;
+        const int y_a           = (y0 + cu->cb_height - 1) >> sps->min_cb_log2_size_y;
+        const int x_b           = (x0 + cu->cb_width - 1) >> sps->min_cb_log2_size_y;
+        const int y_b           = (y0 - 1) >> sps->min_cb_log2_size_y;
+        int min_cb_width        = fc->ps.pps->min_cb_width;
+        int x0b                 = av_mod_uintp2(x0, sps->ctb_log2_size_y);
+        int y0b                 = av_mod_uintp2(y0, sps->ctb_log2_size_y);
+        const int available_l   = lc->ctb_left_flag || x0b;
+        const int available_u   = lc->ctb_up_flag || y0b;
+
+        int a, b, cand[5];
+
+       if (!available_l || (SAMPLE_CTB(fc->tab.cpm[0], x_a, y_a) != MODE_INTRA) ||
+            SAMPLE_CTB(fc->tab.imf, x_a, y_a)) {
+            a = INTRA_PLANAR;
+        } else {
+            a = SAMPLE_CTB(fc->tab.ipm, x_a, y_a);
+        }
+
+        if (!available_u || (SAMPLE_CTB(fc->tab.cpm[0], x_b, y_b) != MODE_INTRA) ||
+            SAMPLE_CTB(fc->tab.imf, x_b, y_b) || !y0b) {
+            b = INTRA_PLANAR;
+        } else {
+            b = SAMPLE_CTB(fc->tab.ipm, x_b, y_b);
+        }
+
+        if (a == b && a > INTRA_DC) {
+            cand[0] = a;
+            cand[1] = 2 + ((a + 61) % 64);
+            cand[2] = 2 + ((a -  1) % 64);
+            cand[3] = 2 + ((a + 60) % 64);
+            cand[4] = 2 + (a % 64);
+        } else {
+            const int minab = FFMIN(a, b);
+            const int maxab = FFMAX(a, b);
+            if (a > INTRA_DC && b > INTRA_DC) {
+                const int diff = maxab - minab;
+                cand[0] = a;
+                cand[1] = b;
+                if (diff == 1) {
+                    cand[2] = 2 + ((minab + 61) % 64);
+                    cand[3] = 2 + ((maxab - 1) % 64);
+                    cand[4] = 2 + ((minab + 60) % 64);
+                } else if (diff >= 62) {
+                    cand[2] = 2 + ((minab - 1) % 64);
+                    cand[3] = 2 + ((maxab + 61) % 64);
+                    cand[4] = 2 + (minab % 64);
+                } else if (diff == 2) {
+                    cand[2] = 2 + ((minab - 1) % 64);
+                    cand[3] = 2 + ((minab + 61) % 64);
+                    cand[4] = 2 + ((maxab - 1) % 64);
+                } else {
+                    cand[2] = 2 + ((minab + 61) % 64);
+                    cand[3] = 2 + ((minab - 1) % 64);
+                    cand[4] = 2 + ((maxab + 61) % 64);
+                }
+            } else if (a > INTRA_DC || b > INTRA_DC) {
+                cand[0] = maxab;
+                cand[1] = 2 + ((maxab + 61 ) % 64);
+                cand[2] = 2 + ((maxab - 1) % 64);
+                cand[3] = 2 + ((maxab + 60 ) % 64);
+                cand[4] = 2 + (maxab % 64);
+            } else {
+                cand[0] = INTRA_DC;
+                cand[1] = INTRA_VERT;
+                cand[2] = INTRA_HORZ;
+                cand[3] = INTRA_VERT - 4;
+                cand[4] = INTRA_VERT + 4;
+            }
+        }
+        if (intra_luma_mpm_flag) {
+            pred = cand[intra_luma_mpm_idx];
+        } else {
+            qsort(cand, FF_ARRAY_ELEMS(cand), sizeof(cand[0]), less);
+            pred = intra_luma_mpm_remainder + 1;
+            for (int i = 0; i < FF_ARRAY_ELEMS(cand); i++) {
+                if (pred >= cand[i])
+                    pred++;
+            }
+        }
+    }
+    return pred;
+}
+
+static int lfnst_idx_decode(VVCLocalContext *lc)
+{
+    CodingUnit  *cu             = lc->cu;
+    const VVCTreeType tree_type = cu->tree_type;
+    const VVCSPS *sps           = lc->fc->ps.sps;
+    const int cb_width          = cu->cb_width;
+    const int cb_height         = cu->cb_height;
+    const TransformUnit  *tu    = cu->tus.head;
+    int lfnst_width, lfnst_height, min_lfnst;
+    int lfnst_idx = 0;
+
+    memset(cu->apply_lfnst_flag, 0, sizeof(cu->apply_lfnst_flag));
+
+    if (!sps->lfnst_enabled_flag || cu->pred_mode != MODE_INTRA || FFMAX(cb_width, cb_height) > sps->max_tb_size_y)
+        return 0;
+
+    while (tu) {
+        for (int j = 0; j < tu->nb_tbs; j++) {
+            const TransformBlock *tb = tu->tbs + j;
+            if (tu->coded_flag[tb->c_idx] && tb->ts)
+                return 0;
+        }
+        tu = tu->next;
+    }
+
+    if (tree_type == DUAL_TREE_CHROMA) {
+        lfnst_width  = cb_width  >> sps->hshift[1];
+        lfnst_height = cb_height >> sps->vshift[1];
+    } else {
+        const int vs = cu->isp_split_type == ISP_VER_SPLIT;
+        const int hs = cu->isp_split_type == ISP_HOR_SPLIT;
+        lfnst_width = vs ? cb_width / cu->num_intra_subpartitions : cb_width;
+        lfnst_height = hs ? cb_height / cu->num_intra_subpartitions : cb_height;
+    }
+    min_lfnst = FFMIN(lfnst_width, lfnst_height);
+    if (tree_type != DUAL_TREE_CHROMA && cu->intra_mip_flag && min_lfnst < 16)
+        return 0;
+
+    if (min_lfnst >= 4) {
+        if ((cu->isp_split_type != ISP_NO_SPLIT || !lc->parse.lfnst_dc_only) && lc->parse.lfnst_zero_out_sig_coeff_flag)
+            lfnst_idx = ff_vvc_lfnst_idx(lc, tree_type != SINGLE_TREE);
+    }
+
+    if (lfnst_idx) {
+        cu->apply_lfnst_flag[LUMA] = tree_type != DUAL_TREE_CHROMA;
+        cu->apply_lfnst_flag[CB] = cu->apply_lfnst_flag[CR] = tree_type == DUAL_TREE_CHROMA;
+    }
+
+    return lfnst_idx;
+}
+
+static MtsIdx mts_idx_decode(VVCLocalContext *lc)
+{
+    const CodingUnit *cu    = lc->cu;
+    const VVCSPS     *sps   = lc->fc->ps.sps;
+    const int cb_width      = cu->cb_width;
+    const int cb_height     = cu->cb_height;
+    const uint8_t transform_skip_flag = cu->tus.head->tbs[0].ts; //fix me
+    int mts_idx = MTS_DCT2_DCT2;
+    if (cu->tree_type != DUAL_TREE_CHROMA && !cu->lfnst_idx &&
+        !transform_skip_flag && FFMAX(cb_width, cb_height) <= 32 &&
+        cu->isp_split_type == ISP_NO_SPLIT && !cu->sbt_flag &&
+        lc->parse.mts_zero_out_sig_coeff_flag && !lc->parse.mts_dc_only) {
+        if ((cu->pred_mode == MODE_INTER && sps->explicit_mts_inter_enabled_flag) ||
+            (cu->pred_mode == MODE_INTRA && sps->explicit_mts_intra_enabled_flag)) {
+            mts_idx = ff_vvc_mts_idx(lc);
+        }
+    }
+
+    return mts_idx;
+}
+
+static enum IntraPredMode derive_center_luma_intra_pred_mode(const VVCFrameContext *fc, const VVCSPS *sps, const VVCPPS *pps, const CodingUnit *cu)
+{
+    const int x_center            = (cu->x0 + cu->cb_width / 2) >> sps->min_cb_log2_size_y;
+    const int y_center            = (cu->y0 + cu->cb_height / 2) >> sps->min_cb_log2_size_y;
+    const int min_cb_width        = pps->min_cb_width;
+    const int intra_mip_flag      = SAMPLE_CTB(fc->tab.imf, x_center, y_center);
+    const int cu_pred_mode        = SAMPLE_CTB(fc->tab.cpm[0], x_center, y_center);
+    const int intra_pred_mode_y   = SAMPLE_CTB(fc->tab.ipm, x_center, y_center);
+
+    if (intra_mip_flag)
+        return INTRA_PLANAR;
+    if (cu_pred_mode == MODE_IBC || cu_pred_mode == MODE_PLT)
+        return INTRA_DC;
+    return intra_pred_mode_y;
+}
+
+static void derive_chroma_intra_pred_mode(VVCLocalContext *lc,
+    const int cclm_mode_flag, const int cclm_mode_idx, const int intra_chroma_pred_mode)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    CodingUnit *cu              = lc->cu;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const int x_cb              = cu->x0 >> sps->min_cb_log2_size_y;
+    const int y_cb              = cu->y0 >> sps->min_cb_log2_size_y;
+    const int min_cb_width      = pps->min_cb_width;
+    const int intra_mip_flag    = SAMPLE_CTB(fc->tab.imf, x_cb, y_cb);
+    enum IntraPredMode luma_intra_pred_mode = SAMPLE_CTB(fc->tab.ipm, x_cb, y_cb);
+
+    if (cu->tree_type == SINGLE_TREE && sps->chroma_format_idc == CHROMA_FORMAT_444 &&
+        intra_chroma_pred_mode == 4 && intra_mip_flag) {
+        cu->mip_chroma_direct_flag = 1;
+        cu->intra_pred_mode_c = luma_intra_pred_mode;
+        return;
+    }
+    luma_intra_pred_mode = derive_center_luma_intra_pred_mode(fc, sps, pps, cu);
+
+    if (cu->act_enabled_flag) {
+        cu->intra_pred_mode_c = luma_intra_pred_mode;
+        return;
+    }
+    if (cclm_mode_flag) {
+        cu->intra_pred_mode_c = INTRA_LT_CCLM + cclm_mode_idx;
+    } else if (intra_chroma_pred_mode == 4){
+        cu->intra_pred_mode_c = luma_intra_pred_mode;
+    } else {
+        const static IntraPredMode pred_mode_c[][4 + 1] = {
+            {INTRA_VDIAG, INTRA_PLANAR, INTRA_PLANAR, INTRA_PLANAR, INTRA_PLANAR},
+            {INTRA_VERT,  INTRA_VDIAG,  INTRA_VERT,   INTRA_VERT,   INTRA_VERT},
+            {INTRA_HORZ,  INTRA_HORZ,   INTRA_VDIAG,  INTRA_HORZ,   INTRA_HORZ},
+            {INTRA_DC,    INTRA_DC,     INTRA_DC,     INTRA_VDIAG,  INTRA_DC},
+        };
+        const int modes[4] = {INTRA_PLANAR, INTRA_VERT, INTRA_HORZ, INTRA_DC};
+        int idx;
+
+        // This workaround is necessary to have 4:4:4 video decode correctly
+        // See VVC ticket https://jvet.hhi.fraunhofer.de/trac/vvc/ticket/1602
+        // and VTM source https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM/-/blob/master/source/Lib/CommonLib/UnitTools.cpp#L736
+        if (cu->tree_type == SINGLE_TREE && sps->chroma_format_idc == CHROMA_FORMAT_444 && intra_mip_flag) {
+            idx = 4;
+        } else {
+            for (idx = 0; idx < FF_ARRAY_ELEMS(modes); idx++) {
+                if (modes[idx] == luma_intra_pred_mode)
+                    break;
+            }
+        }
+
+        cu->intra_pred_mode_c = pred_mode_c[intra_chroma_pred_mode][idx];
+    }
+    if (sps->chroma_format_idc == CHROMA_FORMAT_422 && cu->intra_pred_mode_c <= INTRA_VDIAG) {
+        const static int mode_map_422[INTRA_VDIAG + 1] = {
+             0,  1, 61, 62, 63, 64, 65, 66,  2,  3,  5,  6,  8, 10, 12, 13,
+            14, 16, 18, 20, 22, 23, 24, 26, 28, 30, 31, 33, 34, 35, 36, 37,
+            38, 39, 40, 41, 41, 42, 43, 43, 44, 44, 45, 45, 46, 47, 48, 48,
+            49, 49, 50, 51, 51, 52, 52, 53, 54, 55, 55, 56, 56, 57, 57, 58,
+            59, 59, 60,
+        };
+        cu->intra_pred_mode_c = mode_map_422[cu->intra_pred_mode_c];
+    }
+}
+
+static void intra_luma_pred_modes(VVCLocalContext *lc)
+{
+    VVCFrameContext *fc             = lc->fc;
+    const VVCSPS *sps               = fc->ps.sps;
+    const VVCPPS *pps               = fc->ps.pps;
+    CodingUnit *cu                  = lc->cu;
+    const int log2_min_cb_size      = sps->min_cb_log2_size_y;
+    const int x0                    = cu->x0;
+    const int y0                    = cu->y0;
+    const int x_cb                  = x0 >> log2_min_cb_size;
+    const int y_cb                  = y0 >> log2_min_cb_size;
+    const int cb_width              = cu->cb_width;
+    const int cb_height             = cu->cb_height;
+
+    cu->intra_luma_ref_idx  = 0;
+    if (sps->bdpcm_enabled_flag && cb_width <= sps->max_ts_size && cb_height <= sps->max_ts_size)
+        cu->bdpcm_flag[LUMA] = ff_vvc_intra_bdpcm_luma_flag(lc);
+    if (cu->bdpcm_flag[LUMA]) {
+        cu->intra_pred_mode_y = ff_vvc_intra_bdpcm_luma_dir_flag(lc) ? INTRA_VERT : INTRA_HORZ;
+    } else {
+        if (sps->mip_enabled_flag)
+            cu->intra_mip_flag = ff_vvc_intra_mip_flag(lc, fc->tab.imf);
+        if (cu->intra_mip_flag) {
+            int intra_mip_transposed_flag = ff_vvc_intra_mip_transposed_flag(lc);
+            int intra_mip_mode = ff_vvc_intra_mip_mode(lc);
+            int x = y_cb * pps->min_cb_width + x_cb;
+            for (int y = 0; y < (cb_height>>log2_min_cb_size); y++) {
+                int width = cb_width>>log2_min_cb_size;
+                memset(&fc->tab.imf[x],  cu->intra_mip_flag, width);
+                fc->tab.imtf[x] = intra_mip_transposed_flag;
+                fc->tab.imm[x]  = intra_mip_mode;
+                x += pps->min_cb_width;
+            }
+            cu->intra_pred_mode_y = intra_mip_mode;
+        } else {
+            int intra_subpartitions_mode_flag = 0;
+            if (sps->mrl_enabled_flag && ((y0 % sps->ctb_size_y) > 0))
+                cu->intra_luma_ref_idx = ff_vvc_intra_luma_ref_idx(lc);
+            if (sps->isp_enabled_flag && !cu->intra_luma_ref_idx &&
+                (cb_width <= sps->max_tb_size_y && cb_height <= sps->max_tb_size_y) &&
+                (cb_width * cb_height > MIN_TU_SIZE * MIN_TU_SIZE) &&
+                !cu->act_enabled_flag)
+                intra_subpartitions_mode_flag = ff_vvc_intra_subpartitions_mode_flag(lc);
+            if (!(x0 & 63) && !(y0 & 63))
+                TAB_ISPMF(fc, x0, y0) = intra_subpartitions_mode_flag;
+            cu->isp_split_type = ff_vvc_isp_split_type(lc, intra_subpartitions_mode_flag);
+            cu->num_intra_subpartitions = get_num_intra_subpartitions(cu->isp_split_type, cb_width, cb_height);
+            cu->intra_pred_mode_y = luma_intra_pred_mode(lc, intra_subpartitions_mode_flag);
+        }
+    }
+    set_cb_tab(lc, fc->tab.ipm, cu->intra_pred_mode_y);
+}
+
+static void intra_chroma_pred_modes(VVCLocalContext *lc)
+{
+    const VVCSPS *sps   = lc->fc->ps.sps;
+    CodingUnit *cu      = lc->cu;
+    const int hs        = sps->hshift[CHROMA];
+    const int vs        = sps->vshift[CHROMA];
+
+    cu->mip_chroma_direct_flag = 0;
+    if (sps->bdpcm_enabled_flag &&
+        (cu->cb_width  >> hs) <= sps->max_ts_size &&
+        (cu->cb_height >> vs) <= sps->max_ts_size) {
+        cu->bdpcm_flag[CB] = cu->bdpcm_flag[CR] = ff_vvc_intra_bdpcm_chroma_flag(lc);
+    }
+    if (cu->bdpcm_flag[CHROMA]) {
+        cu->intra_pred_mode_c = ff_vvc_intra_bdpcm_chroma_dir_flag(lc) ? INTRA_VERT : INTRA_HORZ;
+    } else {
+        const int cclm_enabled = get_cclm_enabled(lc, cu->x0, cu->y0);
+        int cclm_mode_flag = 0;
+        int cclm_mode_idx = 0;
+        int intra_chroma_pred_mode = 0;
+
+        if (cclm_enabled)
+            cclm_mode_flag = ff_vvc_cclm_mode_flag(lc);
+
+        if (cclm_mode_flag)
+            cclm_mode_idx = ff_vvc_cclm_mode_idx(lc);
+        else
+            intra_chroma_pred_mode = ff_vvc_intra_chroma_pred_mode(lc);
+        derive_chroma_intra_pred_mode(lc, cclm_mode_flag, cclm_mode_idx, intra_chroma_pred_mode);
+    }
+}
+
+static PredMode pred_mode_decode(VVCLocalContext *lc,
+                                 const VVCTreeType tree_type,
+                                 const VVCModeType mode_type)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    CodingUnit *cu              = lc->cu;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCSH *sh             = &lc->sc->sh;
+    const int ch_type = tree_type == DUAL_TREE_CHROMA ? 1 : 0;
+    const int is_4x4 = cu->cb_width == 4 && cu->cb_height == 4;
+    int pred_mode_flag;
+    int pred_mode_ibc_flag;
+    PredMode pred_mode;
+
+    cu->skip_flag = 0;
+    if (!IS_I(sh) || sps->ibc_enabled_flag) {
+        const int is_128 = cu->cb_width == 128 || cu->cb_height == 128;
+        if (tree_type != DUAL_TREE_CHROMA &&
+            ((!is_4x4 && mode_type != MODE_TYPE_INTRA) ||
+            (sps->ibc_enabled_flag && !is_128))) {
+            cu->skip_flag = ff_vvc_cu_skip_flag(lc, fc->tab.skip);
+        }
+
+        if (is_4x4 || mode_type == MODE_TYPE_INTRA || IS_I(sh)) {
+            pred_mode_flag = 1;
+        } else if (mode_type == MODE_TYPE_INTER || cu->skip_flag) {
+            pred_mode_flag = 0;
+        } else  {
+            pred_mode_flag = ff_vvc_pred_mode_flag(lc, ch_type);
+        }
+        pred_mode = pred_mode_flag ? MODE_INTRA : MODE_INTER;
+
+        if (((IS_I(sh) && !cu->skip_flag) ||
+            (!IS_I(sh) && (pred_mode != MODE_INTRA ||
+            ((is_4x4 || mode_type == MODE_TYPE_INTRA) && !cu->skip_flag)))) &&
+            !is_128 && mode_type != MODE_TYPE_INTER && sps->ibc_enabled_flag &&
+            tree_type != DUAL_TREE_CHROMA) {
+            pred_mode_ibc_flag = ff_vvc_pred_mode_ibc_flag(lc, ch_type);
+        } else if (cu->skip_flag && (is_4x4 || mode_type == MODE_TYPE_INTRA)) {
+            pred_mode_ibc_flag = 1;
+        } else if (is_128 || mode_type == MODE_TYPE_INTER || tree_type == DUAL_TREE_CHROMA) {
+            pred_mode_ibc_flag = 0;
+        } else {
+            pred_mode_ibc_flag = (IS_I(sh)) ? sps->ibc_enabled_flag : 0;
+        }
+        if (pred_mode_ibc_flag)
+            pred_mode = MODE_IBC;
+    } else {
+        pred_mode_flag = is_4x4 || mode_type == MODE_TYPE_INTRA ||
+            mode_type != MODE_TYPE_INTER || IS_I(sh);
+        pred_mode = pred_mode_flag ? MODE_INTRA : MODE_INTER;
+    }
+    return pred_mode;
+}
+
+static void sbt_info(VVCLocalContext *lc, const VVCSPS *sps)
+{
+    CodingUnit *cu      = lc->cu;
+    const int cb_width  = cu->cb_width;
+    const int cb_height = cu->cb_height;
+
+    if (cu->pred_mode == MODE_INTER && sps->sbt_enabled_flag && !cu->ciip_flag
+        && cb_width <= sps->max_tb_size_y && cb_height <= sps->max_tb_size_y) {
+        const int sbt_ver_h = cb_width  >= 8;
+        const int sbt_hor_h = cb_height >= 8;
+        cu->sbt_flag = 0;
+        if (sbt_ver_h || sbt_hor_h)
+            cu->sbt_flag = ff_vvc_sbt_flag(lc);
+        if (cu->sbt_flag) {
+            const int sbt_ver_q = cb_width  >= 16;
+            const int sbt_hor_q = cb_height >= 16;
+            int cu_sbt_quad_flag = 0;
+
+            if ((sbt_ver_h || sbt_hor_h) && (sbt_ver_q || sbt_hor_q))
+                cu_sbt_quad_flag = ff_vvc_sbt_quad_flag(lc);
+            if (cu_sbt_quad_flag) {
+                cu->sbt_horizontal_flag = sbt_hor_q;
+                if (sbt_ver_q && sbt_hor_q)
+                    cu->sbt_horizontal_flag = ff_vvc_sbt_horizontal_flag(lc);
+            } else {
+                cu->sbt_horizontal_flag = sbt_hor_h;
+                if (sbt_ver_h && sbt_hor_h)
+                    cu->sbt_horizontal_flag = ff_vvc_sbt_horizontal_flag(lc);
+            }
+            cu->sbt_pos_flag = ff_vvc_sbt_pos_flag(lc);
+
+            {
+                const int sbt_min = cu_sbt_quad_flag ? 1 : 2;
+                lc->parse.sbt_num_fourths_tb0 = cu->sbt_pos_flag ? (4 - sbt_min) : sbt_min;
+            }
+        }
+    }
+}
+
+static int skipped_transform_tree_unit(VVCLocalContext *lc)
+{
+    const CodingUnit *cu = lc->cu;
+    int ret;
+
+    set_qp_y(lc, cu->x0, cu->y0, 0);
+    set_qp_c(lc);
+    ret = skipped_transform_tree(lc, cu->x0, cu->y0, cu->cb_width, cu->cb_height);
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+static void set_cb_pos(const VVCFrameContext *fc, const CodingUnit *cu)
+{
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const int log2_min_cb_size  = sps->min_cb_log2_size_y;
+    const int x_cb              = cu->x0 >> log2_min_cb_size;
+    const int y_cb              = cu->y0 >> log2_min_cb_size;
+    const int ch_type           = cu->ch_type;
+    int x, y;
+
+    x = y_cb * pps->min_cb_width + x_cb;
+    for (y = 0; y < (cu->cb_height >> log2_min_cb_size); y++) {
+        const int width = cu->cb_width >> log2_min_cb_size;
+
+        for (int i = 0; i < width; i++) {
+            fc->tab.cb_pos_x[ch_type][x + i] = cu->x0;
+            fc->tab.cb_pos_y[ch_type][x + i] = cu->y0;
+        }
+        memset(&fc->tab.cb_width[ch_type][x], cu->cb_width, width);
+        memset(&fc->tab.cb_height[ch_type][x], cu->cb_height, width);
+        memset(&fc->tab.cqt_depth[ch_type][x], cu->cqt_depth, width);
+
+        x += pps->min_cb_width;
+    }
+}
+
+static CodingUnit* alloc_cu(VVCLocalContext *lc, const int x0, const int y0)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps   = fc->ps.sps;
+    const VVCPPS *pps   = fc->ps.pps;
+    const int rx        = x0 >> sps->ctb_log2_size_y;
+    const int ry        = y0 >> sps->ctb_log2_size_y;
+    CTU *ctu            = fc->tab.ctus + ry * pps->ctb_width + rx;
+    CodingUnit *cu;
+
+    AVBufferRef *buf = av_buffer_pool_get(fc->cu_pool);
+    if (!buf)
+        return NULL;
+    cu = (CodingUnit *)buf->data;
+    cu->next = NULL;
+    cu->buf = buf;
+
+    if (lc->cu)
+        lc->cu->next = cu;
+    else
+        ctu->cus = cu;
+    lc->cu = cu;
+
+    return cu;
+}
+
+static CodingUnit* add_cu(VVCLocalContext *lc, const int x0, const int y0,
+    const int cb_width, const int cb_height, const int cqt_depth, const VVCTreeType tree_type)
+{
+    VVCFrameContext *fc = lc->fc;
+    const int ch_type   = tree_type == DUAL_TREE_CHROMA ? 1 : 0;
+    CodingUnit *cu      = alloc_cu(lc, x0, y0);
+
+    if (!cu)
+        return NULL;
+
+    memset(&cu->pu, 0, sizeof(cu->pu));
+
+    lc->parse.prev_tu_cbf_y = 0;
+
+    cu->sbt_flag = 0;
+    cu->act_enabled_flag = 0;
+
+    cu->tree_type = tree_type;
+    cu->x0 = x0;
+    cu->y0 = y0;
+    cu->cb_width = cb_width;
+    cu->cb_height = cb_height;
+    cu->ch_type = ch_type;
+    cu->cqt_depth = cqt_depth;
+    cu->tus.head = cu->tus.tail = NULL;
+    cu->bdpcm_flag[LUMA] = cu->bdpcm_flag[CB] = cu->bdpcm_flag[CR] = 0;
+    cu->isp_split_type = ISP_NO_SPLIT;
+    cu->intra_mip_flag = 0;
+    cu->ciip_flag = 0;
+    cu->coded_flag = 1;
+    cu->num_intra_subpartitions = 1;
+
+    set_cb_pos(fc, cu);
+    return cu;
+}
+
+static void set_cu_tabs(const VVCLocalContext *lc, const CodingUnit *cu)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const TransformUnit *tu     = cu->tus.head;
+
+    set_cb_tab(lc, fc->tab.cpm[cu->ch_type], cu->pred_mode);
+    if (cu->tree_type != DUAL_TREE_CHROMA)
+        set_cb_tab(lc, fc->tab.skip, cu->skip_flag);
+
+    while (tu) {
+          for (int j = 0; j < tu->nb_tbs; j++) {
+            const TransformBlock *tb = tu->tbs + j;
+            if (tb->c_idx != LUMA)
+                set_qp_c_tab(lc, tu, tb);
+            if (tb->c_idx != CR && cu->bdpcm_flag[tb->c_idx])
+                set_tb_tab(fc->tab.pcmf[tb->c_idx], 1, fc, tb);
+        }
+        tu = tu->next;
+    }
+}
+
+//8.5.2.7 Derivation process for merge motion vector difference
+static void derive_mmvd(const VVCLocalContext *lc, MvField *mvf, const Mv *mmvd_offset)
+{
+    const SliceContext *sc  = lc->sc;
+    Mv mmvd[2];
+
+    if (mvf->pred_flag == PF_BI) {
+        const RefPicList *rpl = sc->rpl;
+        const int poc = lc->fc->ps.ph->poc;
+        const int diff[] = {
+            poc - rpl[0].list[mvf->ref_idx[0]],
+            poc - rpl[1].list[mvf->ref_idx[1]]
+        };
+        const int sign = FFSIGN(diff[0]) != FFSIGN(diff[1]);
+
+        if (diff[0] == diff[1]) {
+            mmvd[1] = mmvd[0] = *mmvd_offset;
+        }
+        else {
+            const int i = FFABS(diff[0]) < FFABS(diff[1]);
+            const int o = !i;
+            mmvd[i] = *mmvd_offset;
+            if (!rpl[0].isLongTerm[mvf->ref_idx[0]] && !rpl[1].isLongTerm[mvf->ref_idx[1]]) {
+                ff_vvc_mv_scale(&mmvd[o], mmvd_offset, diff[i], diff[o]);
+            }
+            else {
+                mmvd[o].x = sign ? -mmvd[i].x : mmvd[i].x;
+                mmvd[o].y = sign ? -mmvd[i].y : mmvd[i].y;
+            }
+        }
+        mvf->mv[0].x += mmvd[0].x;
+        mvf->mv[0].y += mmvd[0].y;
+        mvf->mv[1].x += mmvd[1].x;
+        mvf->mv[1].y += mmvd[1].y;
+    } else {
+        const int idx = mvf->pred_flag - PF_L0;
+        mvf->mv[idx].x += mmvd_offset->x;
+        mvf->mv[idx].y += mmvd_offset->y;
+    }
+
+}
+
+static void mvf_to_mi(const MvField *mvf, MotionInfo *mi)
+{
+    mi->pred_flag = mvf->pred_flag;
+    mi->bcw_idx = mvf->bcw_idx;
+    mi->hpel_if_idx = mvf->hpel_if_idx;
+    for (int i = 0; i < 2; i++) {
+        const PredFlag mask = i + 1;
+        if (mvf->pred_flag & mask) {
+            mi->mv[i][0] = mvf->mv[i];
+            mi->ref_idx[i] = mvf->ref_idx[i];
+        }
+    }
+}
+
+static void mv_merge_refine_pred_flag(MvField *mvf, const int width, const int height)
+{
+    if (mvf->pred_flag == PF_BI && (width + height) == 12) {
+        mvf->pred_flag = PF_L0;
+        mvf->bcw_idx = 0;
+    }
+}
+
+// subblock-based inter prediction data
+static void merge_data_subblock(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCPH  *ph            = fc->ps.ph;
+    CodingUnit* cu              = lc->cu;
+    PredictionUnit *pu          = &cu->pu;
+    int merge_subblock_idx      = 0;
+
+    set_cb_tab(lc, fc->tab.msf, pu->merge_subblock_flag);
+    if (ph->max_num_subblock_merge_cand > 1) {
+        merge_subblock_idx = ff_vvc_merge_subblock_idx(lc, ph->max_num_subblock_merge_cand);
+    }
+    ff_vvc_sb_mv_merge_mode(lc, merge_subblock_idx, pu);
+}
+
+static void merge_data_regular(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPH  *ph            = fc->ps.ph;
+    const CodingUnit* cu        = lc->cu;
+    PredictionUnit *pu          = &lc->cu->pu;
+    int merge_idx               = 0;
+    Mv mmvd_offset;
+    MvField mvf;
+
+    if (sps->mmvd_enabled_flag)
+        pu->mmvd_merge_flag = ff_vvc_mmvd_merge_flag(lc);
+    if (pu->mmvd_merge_flag) {
+        int mmvd_cand_flag = 0;
+        if (sps->max_num_merge_cand > 1)
+            mmvd_cand_flag = ff_vvc_mmvd_cand_flag(lc);
+        ff_vvc_mmvd_offset_coding(lc, &mmvd_offset, ph->mmvd_fullpel_only_flag);
+        merge_idx = mmvd_cand_flag;
+    } else if (sps->max_num_merge_cand > 1) {
+        merge_idx = ff_vvc_merge_idx(lc);
+    }
+    ff_vvc_luma_mv_merge_mode(lc, merge_idx, 0, &mvf);
+    if (pu->mmvd_merge_flag)
+        derive_mmvd(lc, &mvf, &mmvd_offset);
+    mv_merge_refine_pred_flag(&mvf, cu->cb_width, cu->cb_height);
+    ff_vvc_store_mvf(lc, &mvf);
+    mvf_to_mi(&mvf, &pu->mi);
+}
+
+static int ciip_flag_decode(VVCLocalContext *lc, const int ciip_avaiable, const int gpm_avaiable, const int is_128)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const CodingUnit *cu        = lc->cu;
+
+    if (ciip_avaiable && gpm_avaiable)
+        return ff_vvc_ciip_flag(lc);
+    return sps->ciip_enabled_flag && !cu->skip_flag &&
+            !is_128 && (cu->cb_width * cu->cb_height >= 64);
+}
+
+static void merge_data_gpm(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    PredictionUnit *pu          = &lc->cu->pu;
+    int merge_gpm_idx[2];
+
+    pu->merge_gpm_flag = 1;
+    pu->gpm_partition_idx = ff_vvc_merge_gpm_partition_idx(lc);
+    merge_gpm_idx[0] = ff_vvc_merge_gpm_idx(lc, 0);
+    merge_gpm_idx[1] = 0;
+    if (sps->max_num_gpm_merge_cand > 2)
+        merge_gpm_idx[1] = ff_vvc_merge_gpm_idx(lc, 1);
+
+    ff_vvc_luma_mv_merge_gpm(lc, merge_gpm_idx, pu->gpm_mv);
+    ff_vvc_store_gpm_mvf(lc, pu);
+}
+
+static void merge_data_ciip(VVCLocalContext *lc)
+{
+    const VVCFrameContext* fc   = lc->fc;
+    const VVCSPS* sps           = fc->ps.sps;
+    CodingUnit *cu              = lc->cu;
+    MotionInfo *mi              = &cu->pu.mi;
+    int merge_idx               = 0;
+    MvField mvf;
+
+    if (sps->max_num_merge_cand > 1)
+        merge_idx = ff_vvc_merge_idx(lc);
+    ff_vvc_luma_mv_merge_mode(lc, merge_idx, 1, &mvf);
+    mv_merge_refine_pred_flag(&mvf, cu->cb_width, cu->cb_height);
+    ff_vvc_store_mvf(lc, &mvf);
+    mvf_to_mi(&mvf, mi);
+    cu->intra_pred_mode_y   = cu->intra_pred_mode_c = INTRA_PLANAR;
+    cu->intra_luma_ref_idx  = 0;
+    cu->intra_mip_flag      = 0;
+}
+
+// block-based inter prediction data
+static void merge_data_block(VVCLocalContext *lc)
+{
+    const VVCFrameContext* fc   = lc->fc;
+    const VVCSPS* sps           = fc->ps.sps;
+    const VVCSH* sh             = &lc->sc->sh;
+    CodingUnit *cu              = lc->cu;
+    const int cb_width          = cu->cb_width;
+    const int cb_height         = cu->cb_height;
+    const int is_128 = cb_width == 128 || cb_height == 128;
+    const int ciip_avaiable = sps->ciip_enabled_flag &&
+        !cu->skip_flag && (cb_width * cb_height >= 64);
+    const int gpm_avaiable  = sps->gpm_enabled_flag && IS_B(sh) &&
+        (cb_width >= 8) && (cb_height >=8) &&
+        (cb_width < 8 * cb_height) && (cb_height < 8 *cb_width);
+
+    int regular_merge_flag = 1;
+
+    if (!is_128 && (ciip_avaiable || gpm_avaiable))
+        regular_merge_flag = ff_vvc_regular_merge_flag(lc, cu->skip_flag);
+    if (regular_merge_flag) {
+        merge_data_regular(lc);
+    } else {
+        cu->ciip_flag = ciip_flag_decode(lc, ciip_avaiable, gpm_avaiable, is_128);
+        if (cu->ciip_flag)
+            merge_data_ciip(lc);
+        else
+            merge_data_gpm(lc);
+    }
+}
+
+static int hls_merge_data(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCPH  *ph            = fc->ps.ph;
+    const CodingUnit *cu        = lc->cu;
+    PredictionUnit *pu          = &lc->cu->pu;
+
+    pu->merge_gpm_flag = 0;
+    pu->mi.num_sb_x = pu->mi.num_sb_y = 1;
+    if (cu->pred_mode == MODE_IBC) {
+        avpriv_report_missing_feature(lc->fc->avctx, "Intra Block Copy");
+        return AVERROR_PATCHWELCOME;
+    } else {
+        if (ph->max_num_subblock_merge_cand > 0 && cu->cb_width >= 8 && cu->cb_height >= 8)
+            pu->merge_subblock_flag = ff_vvc_merge_subblock_flag(lc);
+        if (pu->merge_subblock_flag)
+            merge_data_subblock(lc);
+        else
+            merge_data_block(lc);
+    }
+    return 0;
+}
+
+static void hls_mvd_coding(VVCLocalContext *lc, Mv* mvd)
+{
+    int16_t mv[2];
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        mv[i] = ff_vvc_abs_mvd_greater0_flag(lc);
+    }
+    for (i = 0; i < 2; i++) {
+        if (mv[i])
+            mv[i] += ff_vvc_abs_mvd_greater1_flag(lc);
+    }
+    for (i = 0; i < 2; i++) {
+        if (mv[i] > 0) {
+            if (mv[i] == 2)
+                mv[i] += ff_vvc_abs_mvd_minus2(lc);
+            mv[i] = (1 - 2 * ff_vvc_mvd_sign_flag(lc)) * mv[i];
+        }
+    }
+    mvd->x = mv[0];
+    mvd->y = mv[1];
+}
+
+static int bcw_idx_decode(VVCLocalContext *lc, const MotionInfo *mi, const int cb_width, const int cb_height)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const VVCPH  *ph            = fc->ps.ph;
+    const VVCSH *sh             = &lc->sc->sh;
+    const PredWeightTable *w    = pps->wp_info_in_ph_flag ? &ph->pwt : &sh->pwt;
+    int bcw_idx                 = 0;
+
+    if (sps->bcw_enabled_flag && mi->pred_flag == PF_BI &&
+        !w->weight_flag[L0][LUMA][mi->ref_idx[0]] &&
+        !w->weight_flag[L1][LUMA][mi->ref_idx[1]] &&
+        !w->weight_flag[L0][CHROMA][mi->ref_idx[0]] &&
+        !w->weight_flag[L1][CHROMA][mi->ref_idx[1]] &&
+        cb_width * cb_height >= 256) {
+        bcw_idx = ff_vvc_bcw_idx(lc, ff_vvc_no_backward_pred_flag(lc));
+    }
+    return bcw_idx;
+}
+
+static int8_t ref_idx_decode(VVCLocalContext *lc, const VVCSH *sh, const int sym_mvd_flag, const int lx)
+{
+    int ref_idx = 0;
+    if (sh->nb_refs[lx] > 1 && !sym_mvd_flag)
+        ref_idx = ff_vvc_ref_idx_lx(lc, sh->nb_refs[lx]);
+    else if (sym_mvd_flag)
+        ref_idx = sh->ref_idx_sym[lx];
+    return ref_idx;
+}
+
+static int mvds_decode(VVCLocalContext *lc, Mv mvds[2][MAX_CONTROL_POINTS],
+    const int num_cp_mv, const int lx)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCPH *ph             = fc->ps.ph;
+    const PredictionUnit *pu    = &lc->cu->pu;
+    const MotionInfo *mi        = &pu->mi;
+    int has_no_zero_mvd         = 0;
+
+    if (lx == L1 && ph->mvd_l1_zero_flag && mi->pred_flag == PF_BI) {
+        for (int j = 0; j < num_cp_mv; j++)
+            AV_ZERO64(&mvds[lx][j]);
+    } else {
+        Mv *mvd0 = &mvds[lx][0];
+        if (lx == L1 && pu->sym_mvd_flag) {
+            mvd0->x = -mvds[L0][0].x;
+            mvd0->y = -mvds[L0][0].y;
+        } else {
+            hls_mvd_coding(lc, mvd0);
+        }
+        has_no_zero_mvd |= (mvd0->x || mvd0->y);
+        for (int j = 1; j < num_cp_mv; j++) {
+            Mv *mvd = &mvds[lx][j];
+            hls_mvd_coding(lc, mvd);
+            mvd->x += mvd0->x;
+            mvd->y += mvd0->y;
+            has_no_zero_mvd |= (mvd->x || mvd->y);
+        }
+    }
+    return has_no_zero_mvd;
+}
+
+static void mvp_add_difference(MotionInfo *mi, const int num_cp_mv,
+    const Mv mvds[2][MAX_CONTROL_POINTS], const int amvr_shift)
+{
+    for (int i = 0; i < 2; i++) {
+        const PredFlag mask = i + PF_L0;
+        if (mi->pred_flag & mask) {
+            for (int j = 0; j < num_cp_mv; j++) {
+                const Mv *mvd = &mvds[i][j];
+                mi->mv[i][j].x += mvd->x << amvr_shift;
+                mi->mv[i][j].y += mvd->y << amvr_shift;
+            }
+        }
+    }
+}
+
+static int mvp_data(VVCLocalContext *lc)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const CodingUnit *cu        = lc->cu;
+    PredictionUnit *pu          = &lc->cu->pu;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPH *ph             = fc->ps.ph;
+    const VVCSH *sh             = &lc->sc->sh;
+    MotionInfo *mi              = &pu->mi;
+    const int cb_width          = cu->cb_width;
+    const int cb_height         = cu->cb_height;
+
+    int mvp_lx_flag[2] = {0};
+    int cu_affine_type_flag = 0;
+    int num_cp_mv;
+    int amvr_enabled, has_no_zero_mvd = 0, amvr_shift;
+    Mv mvds[2][MAX_CONTROL_POINTS];
+
+    mi->pred_flag = ff_vvc_pred_flag(lc, IS_B(sh));
+    if (sps->affine_enabled_flag && cb_width >= 16 && cb_height >= 16) {
+        pu->inter_affine_flag = ff_vvc_inter_affine_flag(lc);
+        set_cb_tab(lc, fc->tab.iaf, pu->inter_affine_flag);
+        if (sps->six_param_affine_enabled_flag && pu->inter_affine_flag)
+            cu_affine_type_flag = ff_vvc_cu_affine_type_flag(lc);
+    }
+    mi->motion_model_idc = pu->inter_affine_flag + cu_affine_type_flag;
+    num_cp_mv = mi->motion_model_idc + 1;
+
+    if (sps->smvd_enabled_flag && !ph->mvd_l1_zero_flag &&
+        mi->pred_flag == PF_BI && !pu->inter_affine_flag &&
+        sh->ref_idx_sym[0] > -1 && sh->ref_idx_sym[1] > -1)
+        pu->sym_mvd_flag = ff_vvc_sym_mvd_flag(lc);
+
+    for (int i = L0; i <= L1; i++) {
+        const PredFlag pred_flag = PF_L0 + !i;
+        if (mi->pred_flag != pred_flag) {
+            mi->ref_idx[i] = ref_idx_decode(lc, sh, pu->sym_mvd_flag, i);
+            has_no_zero_mvd |= mvds_decode(lc, mvds, num_cp_mv, i);
+            mvp_lx_flag[i] = ff_vvc_mvp_lx_flag(lc);
+        }
+    }
+
+    amvr_enabled = mi->motion_model_idc == MOTION_TRANSLATION ?
+        sps->amvr_enabled_flag : sps->affine_amvr_enabled_flag;
+    amvr_enabled &= has_no_zero_mvd;
+
+    amvr_shift = ff_vvc_amvr_shift(lc, pu->inter_affine_flag, cu->pred_mode, amvr_enabled);
+
+    mi->hpel_if_idx = amvr_shift == 3;
+    mi->bcw_idx = bcw_idx_decode(lc, mi, cb_width, cb_height);
+
+    if (mi->motion_model_idc)
+        ff_vvc_affine_mvp(lc, mvp_lx_flag, amvr_shift, mi);
+    else
+        ff_vvc_mvp(lc, mvp_lx_flag, amvr_shift, mi);
+
+    mvp_add_difference(mi, num_cp_mv, mvds, amvr_shift);
+
+    if (mi->motion_model_idc)
+        ff_vvc_store_sb_mvs(lc, pu);
+    else
+        ff_vvc_store_mv(lc, &pu->mi);
+
+    return 0;
+}
+
+// derive bdofFlag from 8.5.6 Decoding process for inter blocks
+// derive dmvr from 8.5.1 General decoding process for coding units coded in inter prediction mode
+static void derive_dmvr_bdof_flag(const VVCLocalContext *lc, PredictionUnit *pu)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCPPS *pps           = fc->ps.pps;
+    const VVCPH *ph             = fc->ps.ph;
+    const VVCSH *sh             = &lc->sc->sh;
+    const int poc               = ph->poc;
+    const RefPicList *rpl0      = lc->sc->rpl + L0;
+    const RefPicList *rpl1      = lc->sc->rpl + L1;
+    const int8_t *ref_idx       = pu->mi.ref_idx;
+    const MotionInfo *mi        = &pu->mi;
+    const CodingUnit *cu        = lc->cu;
+    const PredWeightTable *w    = pps->wp_info_in_ph_flag ? &fc->ps.ph->pwt : &sh->pwt;
+
+    pu->dmvr_flag = 0;
+    pu->bdof_flag = 0;
+
+    if (mi->pred_flag == PF_BI &&
+        (poc - rpl0->list[ref_idx[L0]] == rpl1->list[ref_idx[L1]] - poc) &&
+        !rpl0->isLongTerm[ref_idx[L0]] && !rpl1->isLongTerm[ref_idx[L1]] &&
+        !cu->ciip_flag &&
+        !mi->bcw_idx &&
+        !w->weight_flag[L0][LUMA][mi->ref_idx[L0]] && !w->weight_flag[L1][LUMA][mi->ref_idx[L1]] &&
+        !w->weight_flag[L0][CHROMA][mi->ref_idx[L0]] && !w->weight_flag[L1][CHROMA][mi->ref_idx[L1]] &&
+        cu->cb_width >= 8 && cu->cb_height >= 8 &&
+        (cu->cb_width * cu->cb_height >= 128)) {
+        // fixme: for RprConstraintsActiveFlag
+        if (!ph->bdof_disabled_flag &&
+            mi->motion_model_idc == MOTION_TRANSLATION &&
+            !pu->merge_subblock_flag &&
+            !pu->sym_mvd_flag)
+            pu->bdof_flag = 1;
+        if (!ph->dmvr_disabled_flag &&
+            pu->general_merge_flag &&
+            !pu->mmvd_merge_flag)
+            pu->dmvr_flag = 1;
+    }
+}
+
+// part of 8.5.1 General decoding process for coding units coded in inter prediction mode
+static void refine_regular_subblock(const VVCLocalContext *lc)
+{
+    const CodingUnit *cu    = lc->cu;
+    PredictionUnit *pu      = &lc->cu->pu;
+
+    derive_dmvr_bdof_flag(lc, pu);
+    if (pu->dmvr_flag || pu->bdof_flag) {
+        pu->mi.num_sb_x = (cu->cb_width > 16) ? (cu->cb_width >> 4) : 1;
+        pu->mi.num_sb_y = (cu->cb_height > 16) ? (cu->cb_height >> 4) : 1;
+    }
+}
+
+static int vvc_inter_data(VVCLocalContext *lc)
+{
+    const CodingUnit *cu    = lc->cu;
+    PredictionUnit *pu      = &lc->cu->pu;
+    const MotionInfo *mi    = &pu->mi;
+    int ret                 = 0;
+
+    pu->general_merge_flag = 1;
+    if (!cu->skip_flag)
+        pu->general_merge_flag = ff_vvc_general_merge_flag(lc);
+
+    if (pu->general_merge_flag) {
+        hls_merge_data(lc);
+    } else if (cu->pred_mode == MODE_IBC){
+        avpriv_report_missing_feature(lc->fc->avctx, "Intra Block Copy");
+        return AVERROR_PATCHWELCOME;
+    } else {
+        ret = mvp_data(lc);
+    }
+    if (!pu->merge_gpm_flag && !pu->inter_affine_flag && !pu->merge_subblock_flag) {
+        refine_regular_subblock(lc);
+        ff_vvc_update_hmvp(lc, mi);
+    }
+    return ret;
+}
+
+static int hls_coding_unit(VVCLocalContext *lc, int x0, int y0, int cb_width, int cb_height,
+    int cqt_depth, const VVCTreeType tree_type, VVCModeType mode_type)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCSH *sh             = &lc->sc->sh;
+    const int hs                = sps->hshift[CHROMA];
+    const int vs                = sps->vshift[CHROMA];
+    const int is_128            = cb_width > 64 || cb_height > 64;
+    int pred_mode_plt_flag = 0;
+    int ret;
+
+    CodingUnit *cu = add_cu(lc, x0, y0, cb_width, cb_height, cqt_depth, tree_type);
+
+    if (!cu)
+        return AVERROR(ENOMEM);
+
+    ff_vvc_set_neighbour_available(lc, cu->x0, cu->y0, cu->cb_width, cu->cb_height);
+
+    if (IS_I(sh) && is_128)
+        mode_type = MODE_TYPE_INTRA;
+    cu->pred_mode = pred_mode_decode(lc, tree_type, mode_type);
+
+    if (cu->pred_mode == MODE_INTRA && sps->palette_enabled_flag && !is_128 && !cu->skip_flag &&
+        mode_type != MODE_TYPE_INTER && ((cb_width * cb_height) >
+        (tree_type != DUAL_TREE_CHROMA ? 16 : (16 << hs << vs))) &&
+        (mode_type != MODE_TYPE_INTRA || tree_type != DUAL_TREE_CHROMA)) {
+        pred_mode_plt_flag = ff_vvc_pred_mode_plt_flag(lc);
+        if (pred_mode_plt_flag) {
+            avpriv_report_missing_feature(lc->fc->avctx, "Palette");
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+    if (cu->pred_mode == MODE_INTRA && sps->act_enabled_flag && tree_type == SINGLE_TREE) {
+        avpriv_report_missing_feature(fc->avctx, "Adaptive Color Transform");
+        return AVERROR_PATCHWELCOME;
+    }
+    if (cu->pred_mode == MODE_INTRA || cu->pred_mode == MODE_PLT) {
+        if (tree_type == SINGLE_TREE || tree_type == DUAL_TREE_LUMA) {
+            if (pred_mode_plt_flag) {
+                avpriv_report_missing_feature(lc->fc->avctx, "Palette");
+                return AVERROR_PATCHWELCOME;
+            } else {
+                intra_luma_pred_modes(lc);
+            }
+            ff_vvc_set_intra_mvf(lc);
+        }
+        if ((tree_type == SINGLE_TREE || tree_type == DUAL_TREE_CHROMA) && sps->chroma_format_idc) {
+            if (pred_mode_plt_flag && tree_type == DUAL_TREE_CHROMA) {
+                avpriv_report_missing_feature(lc->fc->avctx, "Palette");
+                return AVERROR_PATCHWELCOME;
+            } else if (!pred_mode_plt_flag) {
+                if (!cu->act_enabled_flag)
+                    intra_chroma_pred_modes(lc);
+            }
+        }
+    } else if (tree_type != DUAL_TREE_CHROMA) { /* MODE_INTER or MODE_IBC */
+        if ((ret = vvc_inter_data(lc)) < 0)
+            return ret;
+    }
+    if (cu->pred_mode != MODE_INTRA && !pred_mode_plt_flag && !lc->cu->pu.general_merge_flag)
+        cu->coded_flag = ff_vvc_cu_coded_flag(lc);
+    else
+        cu->coded_flag = !(cu->skip_flag || pred_mode_plt_flag);
+
+    if (cu->coded_flag) {
+        sbt_info(lc, sps);
+        if (sps->act_enabled_flag && cu->pred_mode != MODE_INTRA && tree_type == SINGLE_TREE) {
+            avpriv_report_missing_feature(fc->avctx, "Adaptive Color Transform");
+            return AVERROR_PATCHWELCOME;
+        }
+        lc->parse.lfnst_dc_only = 1;
+        lc->parse.lfnst_zero_out_sig_coeff_flag = 1;
+        lc->parse.mts_dc_only = 1;
+        lc->parse.mts_zero_out_sig_coeff_flag = 1;
+        ret = hls_transform_tree(lc, x0, y0, cb_width, cb_height, cu->ch_type);
+        if (ret < 0)
+            return ret;
+        cu->lfnst_idx = lfnst_idx_decode(lc);
+        cu->mts_idx = mts_idx_decode(lc);
+        set_qp_c(lc);
+        if (ret < 0)
+            return ret;
+    } else {
+        av_assert0(tree_type == SINGLE_TREE);
+        ret = skipped_transform_tree_unit(lc);
+        if (ret < 0)
+            return ret;
+    }
+    set_cu_tabs(lc, cu);
+
+    return 0;
+}
+
+static int derive_mode_type_condition(const VVCLocalContext *lc,
+    const VVCSplitMode split, const int cb_width, const int cb_height, const VVCModeType mode_type_curr)
+{
+    const VVCSH *sh     = &lc->sc->sh;
+    const VVCSPS *sps   = lc->fc->ps.sps;
+    const int area      = cb_width * cb_height;
+
+    if ((IS_I(sh) && sps->qtbtt_dual_tree_intra_flag) ||
+        mode_type_curr != MODE_TYPE_ALL || !sps->chroma_format_idc ||
+        sps->chroma_format_idc == CHROMA_FORMAT_444)
+        return 0;
+    if ((area == 64 && (split == SPLIT_QT || split == SPLIT_TT_HOR || split == SPLIT_TT_VER)) ||
+        (area == 32 &&  (split == SPLIT_BT_HOR || split == SPLIT_BT_VER)))
+        return 1;
+    if ((area == 64 && (split == SPLIT_BT_HOR || split == SPLIT_BT_VER) && sps->chroma_format_idc == CHROMA_FORMAT_420) ||
+        (area == 128 && (split == SPLIT_TT_HOR || split == SPLIT_TT_VER) && sps->chroma_format_idc == CHROMA_FORMAT_420) ||
+        (cb_width == 8 && split == SPLIT_BT_VER) || (cb_width == 16 && split == SPLIT_TT_VER))
+        return 1 + !IS_I(sh);
+
+    return 0;
+}
+
+static VVCModeType mode_type_decode(VVCLocalContext *lc, const int x0, const int y0,
+    const int cb_width, const int cb_height, const VVCSplitMode split, const int ch_type,
+    const VVCModeType mode_type_curr)
+{
+    VVCModeType mode_type;
+    const int mode_type_condition = derive_mode_type_condition(lc, split, cb_width, cb_height, mode_type_curr);
+
+    if (mode_type_condition == 1)
+        mode_type = MODE_TYPE_INTRA;
+    else if (mode_type_condition == 2) {
+        mode_type = ff_vvc_non_inter_flag(lc, x0, y0, ch_type) ? MODE_TYPE_INTRA : MODE_TYPE_INTER;
+    } else {
+        mode_type = mode_type_curr;
+    }
+
+    return mode_type;
+}
+
+static int hls_coding_tree(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset, int part_idx,
+    VVCSplitMode last_split_mode, VVCTreeType tree_type_curr, VVCModeType mode_type_curr);
+
+static int coding_tree_btv(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type)
+{
+#define CODING_TREE(x, idx) do { \
+    ret = hls_coding_tree(lc, x, y0, cb_width / 2, cb_height, \
+        qg_on_y, qg_on_c, cb_sub_div + 1, cqt_depth, mtt_depth + 1, \
+        depth_offset, idx, SPLIT_BT_VER, tree_type, mode_type); \
+    if (ret < 0) \
+        return ret; \
+} while (0);
+
+    const VVCPPS *pps = lc->fc->ps.pps;
+    const int x1 = x0 + cb_width / 2;
+    int ret = 0;
+
+    depth_offset += (x0 + cb_width > pps->width) ? 1 : 0;
+    CODING_TREE(x0, 0);
+    if (x1 < pps->width)
+        CODING_TREE(x1, 1);
+
+    return 0;
+
+#undef CODING_TREE
+}
+
+static int coding_tree_bth(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type)
+{
+#define CODING_TREE(y, idx) do { \
+        ret = hls_coding_tree(lc, x0, y, cb_width , cb_height / 2, \
+            qg_on_y, qg_on_c, cb_sub_div + 1, cqt_depth, mtt_depth + 1, \
+            depth_offset, idx, SPLIT_BT_HOR, tree_type, mode_type); \
+        if (ret < 0) \
+            return ret; \
+    } while (0);
+
+    const VVCPPS *pps = lc->fc->ps.pps;
+    const int y1 = y0 + (cb_height / 2);
+    int ret = 0;
+
+    depth_offset += (y0 + cb_height > pps->height) ? 1 : 0;
+    CODING_TREE(y0, 0);
+    if (y1 < pps->height)
+        CODING_TREE(y1, 1);
+
+    return 0;
+
+#undef CODING_TREE
+}
+
+static int coding_tree_ttv(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type)
+{
+#define CODING_TREE(x, w, sub_div, idx) do { \
+        ret = hls_coding_tree(lc, x, y0, w, cb_height, \
+            qg_on_y, qg_on_c, sub_div, cqt_depth, mtt_depth + 1, \
+            depth_offset, idx, SPLIT_TT_VER, tree_type, mode_type); \
+        if (ret < 0) \
+            return ret; \
+    } while (0);
+
+    const VVCSH *sh = &lc->sc->sh;
+    const int x1    = x0 + cb_width / 4;
+    const int x2    = x0 + cb_width * 3 / 4;
+    int ret;
+
+    qg_on_y = qg_on_y && (cb_sub_div + 2 <= sh->cu_qp_delta_subdiv);
+    qg_on_c = qg_on_c && (cb_sub_div + 2 <= sh->cu_chroma_qp_offset_subdiv);
+
+    CODING_TREE(x0, cb_width / 4, cb_sub_div + 2, 0);
+    CODING_TREE(x1, cb_width / 2, cb_sub_div + 1, 1);
+    CODING_TREE(x2, cb_width / 4, cb_sub_div + 2, 2);
+
+    return 0;
+
+#undef CODING_TREE
+}
+
+static int coding_tree_tth(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type)
+{
+#define CODING_TREE(y, h, sub_div, idx) do { \
+        ret = hls_coding_tree(lc, x0, y, cb_width, h, \
+            qg_on_y, qg_on_c, sub_div, cqt_depth, mtt_depth + 1, \
+            depth_offset, idx, SPLIT_TT_HOR, tree_type, mode_type); \
+        if (ret < 0) \
+            return ret; \
+    } while (0);
+
+    const VVCSH *sh = &lc->sc->sh;
+    const int y1    = y0 + (cb_height / 4);
+    const int y2    = y0 + (3 * cb_height / 4);
+    int ret;
+
+    qg_on_y = qg_on_y && (cb_sub_div + 2 <= sh->cu_qp_delta_subdiv);
+    qg_on_c = qg_on_c && (cb_sub_div + 2 <= sh->cu_chroma_qp_offset_subdiv);
+
+    CODING_TREE(y0, cb_height / 4, cb_sub_div + 2, 0);
+    CODING_TREE(y1, cb_height / 2, cb_sub_div + 1, 1);
+    CODING_TREE(y2, cb_height / 4, cb_sub_div + 2, 2);
+
+    return 0;
+
+#undef CODING_TREE
+}
+
+static int coding_tree_qt(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type)
+{
+#define CODING_TREE(x, y, idx) do { \
+        ret = hls_coding_tree(lc, x, y, cb_width / 2, cb_height / 2, \
+            qg_on_y, qg_on_c, cb_sub_div + 2, cqt_depth + 1, 0, 0, \
+            idx, SPLIT_QT, tree_type, mode_type); \
+        if (ret < 0) \
+            return ret; \
+    } while (0);
+
+    const VVCPPS *pps = lc->fc->ps.pps;
+    const int x1 = x0 + cb_width / 2;
+    const int y1 = y0 + cb_height / 2;
+    int ret = 0;
+
+    CODING_TREE(x0, y0, 0);
+    if (x1 < pps->width)
+        CODING_TREE(x1, y0, 1);
+    if (y1 < pps->height)
+        CODING_TREE(x0, y1, 2);
+    if (x1 < pps->width &&
+        y1 < pps->height)
+        CODING_TREE(x1, y1, 3);
+
+    return 0;
+
+#undef CODING_TREE
+}
+
+typedef int (*coding_tree_fn)(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset,
+    VVCTreeType tree_type, VVCModeType mode_type);
+
+const static coding_tree_fn coding_tree[] = {
+    coding_tree_tth,
+    coding_tree_bth,
+    coding_tree_ttv,
+    coding_tree_btv,
+    coding_tree_qt,
+};
+
+static int hls_coding_tree(VVCLocalContext *lc,
+    int x0, int y0, int cb_width, int cb_height, int qg_on_y, int qg_on_c,
+    int cb_sub_div, int cqt_depth, int mtt_depth, int depth_offset, int part_idx,
+    VVCSplitMode last_split_mode, VVCTreeType tree_type_curr, VVCModeType mode_type_curr)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCPPS *pps   = fc->ps.pps;
+    const VVCSH *sh     = &lc->sc->sh;
+    const int ch_type   = tree_type_curr == DUAL_TREE_CHROMA;
+    int ret;
+    VVCAllowedSplit allowed;
+
+    if (pps->cu_qp_delta_enabled_flag && qg_on_y && cb_sub_div <= sh->cu_qp_delta_subdiv) {
+        lc->parse.is_cu_qp_delta_coded = 0;
+        lc->parse.cu_qg_top_left_x = x0;
+        lc->parse.cu_qg_top_left_y = y0;
+    }
+    if (sh->cu_chroma_qp_offset_enabled_flag && qg_on_c &&
+        cb_sub_div <= sh->cu_chroma_qp_offset_subdiv) {
+        lc->parse.is_cu_chroma_qp_offset_coded = 0;
+        memset(lc->parse.chroma_qp_offset, 0, sizeof(lc->parse.chroma_qp_offset));
+    }
+
+    can_split(lc, x0, y0, cb_width, cb_height, mtt_depth, depth_offset, part_idx,
+        last_split_mode, tree_type_curr, mode_type_curr, &allowed);
+    if (ff_vvc_split_cu_flag(lc, x0, y0, cb_width, cb_height, ch_type, &allowed)) {
+        VVCSplitMode split      = ff_vvc_split_mode(lc, x0, y0, cb_width, cb_height, cqt_depth, mtt_depth, ch_type, &allowed);
+        VVCModeType mode_type   = mode_type_decode(lc, x0, y0, cb_width, cb_height, split, ch_type, mode_type_curr);
+
+        VVCTreeType tree_type   = (mode_type == MODE_TYPE_INTRA) ? DUAL_TREE_LUMA : tree_type_curr;
+
+        if (split != SPLIT_QT) {
+            if (!(x0 & 31) && !(y0 & 31) && mtt_depth <= 1)
+                TAB_MSM(fc, mtt_depth, x0, y0) = split;
+        }
+        ret = coding_tree[split - 1](lc, x0, y0, cb_width, cb_height, qg_on_y, qg_on_c,
+            cb_sub_div, cqt_depth, mtt_depth, depth_offset, tree_type, mode_type);
+        if (ret < 0)
+            return ret;
+        if (mode_type_curr == MODE_TYPE_ALL && mode_type == MODE_TYPE_INTRA) {
+            ret = hls_coding_tree(lc, x0, y0, cb_width, cb_height, 0, qg_on_c, cb_sub_div,
+                cqt_depth, mtt_depth, 0, 0, split, DUAL_TREE_CHROMA, mode_type);
+            if (ret < 0)
+                return ret;
+        }
+    } else {
+        ret = hls_coding_unit(lc, x0, y0, cb_width, cb_height, cqt_depth, tree_type_curr, mode_type_curr);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int dual_tree_implicit_qt_split(VVCLocalContext *lc,
+    const int x0, const int y0, const int cb_size, const int cqt_depth)
+{
+    const VVCSH *sh     = &lc->sc->sh;
+    const VVCPPS *pps   = lc->fc->ps.pps;
+    const int cb_subdiv = 2 * cqt_depth;
+    int ret;
+
+    if (cb_size > 64) {
+        #define DUAL_TREE(x, y) do {                                                \
+            ret = dual_tree_implicit_qt_split(lc, x, y, cb_size / 2, cqt_depth + 1); \
+            if (ret < 0)                                                            \
+                return ret;                                                         \
+        } while (0)
+
+        const int x1 = x0 + (cb_size / 2);
+        const int y1 = y0 + (cb_size / 2);
+        if (pps->cu_qp_delta_enabled_flag && cb_subdiv <= sh->cu_qp_delta_subdiv) {
+            lc->parse.is_cu_qp_delta_coded = 0;
+            lc->parse.cu_qg_top_left_x = x0;
+            lc->parse.cu_qg_top_left_y = y0;
+        }
+        if (sh->cu_chroma_qp_offset_enabled_flag && cb_subdiv <= sh->cu_chroma_qp_offset_subdiv) {
+            lc->parse.is_cu_chroma_qp_offset_coded = 0;
+            memset(lc->parse.chroma_qp_offset, 0, sizeof(lc->parse.chroma_qp_offset));
+        }
+        DUAL_TREE(x0, y0);
+        if (x1 < pps->width)
+            DUAL_TREE(x1, y0);
+        if (y1 < pps->height)
+            DUAL_TREE(x0, y1);
+        if (x1 < pps->width && y1 < pps->height)
+            DUAL_TREE(x1, y1);
+    #undef DUAL_TREE
+    } else {
+        #define CODING_TREE(tree_type) do {                                             \
+            const int qg_on_y = tree_type == DUAL_TREE_LUMA;                            \
+            ret = hls_coding_tree(lc, x0, y0, cb_size, cb_size, qg_on_y, !qg_on_y,           \
+                 cb_subdiv, cqt_depth, 0, 0, 0, SPLIT_NONE, tree_type, MODE_TYPE_ALL);  \
+            if (ret < 0)                                                                \
+                return ret;                                                             \
+        } while (0)
+        CODING_TREE(DUAL_TREE_LUMA);
+        CODING_TREE(DUAL_TREE_CHROMA);
+        #undef CODING_TREE
+    }
+    return 0;
+}
+
+#define SET_SAO(elem, value)                            \
+do {                                                    \
+    if (!sao_merge_up_flag && !sao_merge_left_flag)     \
+        sao->elem = value;                              \
+    else if (sao_merge_left_flag)                       \
+        sao->elem = CTB(fc->tab.sao, rx-1, ry).elem;         \
+    else if (sao_merge_up_flag)                         \
+        sao->elem = CTB(fc->tab.sao, rx, ry-1).elem;         \
+    else                                                \
+        sao->elem = 0;                                  \
+} while (0)
+
+static void hls_sao(VVCLocalContext *lc, const int rx, const int ry)
+{
+    VVCFrameContext *fc     = lc->fc;
+    const VVCSH *sh         = &lc->sc->sh;
+    int sao_merge_left_flag = 0;
+    int sao_merge_up_flag   = 0;
+    SAOParams *sao          = &CTB(fc->tab.sao, rx, ry);
+    int c_idx, i;
+
+    if (sh->sao_used_flag[0] || sh->sao_used_flag[1]) {
+        if (rx > 0) {
+            if (lc->ctb_left_flag)
+                sao_merge_left_flag = ff_vvc_sao_merge_flag_decode(lc);
+        }
+        if (ry > 0 && !sao_merge_left_flag) {
+            if (lc->ctb_up_flag)
+                sao_merge_up_flag = ff_vvc_sao_merge_flag_decode(lc);
+        }
+    }
+
+    for (c_idx = 0; c_idx < (fc->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
+        if (!sh->sao_used_flag[c_idx]) {
+            sao->type_idx[c_idx] = SAO_NOT_APPLIED;
+            continue;
+        }
+
+        if (c_idx == 2) {
+            sao->type_idx[2] = sao->type_idx[1];
+            sao->eo_class[2] = sao->eo_class[1];
+        } else {
+            SET_SAO(type_idx[c_idx], ff_vvc_sao_type_idx_decode(lc));
+        }
+
+        if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
+            continue;
+
+        for (i = 0; i < 4; i++)
+            SET_SAO(offset_abs[c_idx][i], ff_vvc_sao_offset_abs_decode(lc));
+
+        if (sao->type_idx[c_idx] == SAO_BAND) {
+            for (i = 0; i < 4; i++) {
+                if (sao->offset_abs[c_idx][i]) {
+                    SET_SAO(offset_sign[c_idx][i],
+                            ff_vvc_sao_offset_sign_decode(lc));
+                } else {
+                    sao->offset_sign[c_idx][i] = 0;
+                }
+            }
+            SET_SAO(band_position[c_idx], ff_vvc_sao_band_position_decode(lc));
+        } else if (c_idx != 2) {
+            SET_SAO(eo_class[c_idx], ff_vvc_sao_eo_class_decode(lc));
+        }
+
+        // Inferred parameters
+        sao->offset_val[c_idx][0] = 0;
+        for (i = 0; i < 4; i++) {
+            sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
+            if (sao->type_idx[c_idx] == SAO_EDGE) {
+                if (i > 1)
+                    sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
+            } else if (sao->offset_sign[c_idx][i]) {
+                sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
+            }
+            sao->offset_val[c_idx][i + 1] *= 1 << (fc->ps.sps->bit_depth - FFMIN(10, fc->ps.sps->bit_depth));
+        }
+    }
+}
+
+static void alf_params(VVCLocalContext *lc, const int rx, const int ry)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSH *sh             = &lc->sc->sh;
+    ALFParams *alf              = &CTB(fc->tab.alf, rx, ry);
+
+    alf->ctb_flag[LUMA] = alf->ctb_flag[CB] = alf->ctb_flag[CR] = 0;
+    if (sh->alf.enabled_flag[LUMA]) {
+        alf->ctb_flag[LUMA] = ff_vvc_alf_ctb_flag(lc, rx, ry, LUMA);
+        if (alf->ctb_flag[LUMA]) {
+            int alf_use_aps_flag = 0;
+            if (sh->alf.num_aps_ids_luma > 0) {
+                alf_use_aps_flag = ff_vvc_alf_use_aps_flag(lc);
+            }
+            if (alf_use_aps_flag) {
+                alf->ctb_filt_set_idx_y = 16;
+                if (sh->alf.num_aps_ids_luma > 1)
+                    alf->ctb_filt_set_idx_y += ff_vvc_alf_luma_prev_filter_idx(lc);
+            } else {
+                alf->ctb_filt_set_idx_y = ff_vvc_alf_luma_fixed_filter_idx(lc);
+            }
+        }
+        for (int c_idx = CB; c_idx <= CR; c_idx++) {
+            if (sh->alf.enabled_flag[c_idx]) {
+                const VVCALF *aps = (VVCALF*)fc->ps.alf_list[sh->alf.aps_id_chroma]->data;
+                alf->ctb_flag[c_idx] = ff_vvc_alf_ctb_flag(lc, rx, ry, c_idx);
+                alf->alf_ctb_filter_alt_idx[c_idx - 1] = 0;
+                if (alf->ctb_flag[c_idx] && aps->num_chroma_filters > 1)
+                    alf->alf_ctb_filter_alt_idx[c_idx - 1] = ff_vvc_alf_ctb_filter_alt_idx(lc, c_idx, aps->num_chroma_filters);
+            }
+        }
+    }
+    for (int i = 0; i < 2; i++) {
+        alf->ctb_cc_idc[i] = 0;
+        if (sh->alf.cc_enabled_flag[i]) {
+            const VVCALF *aps = (VVCALF*)fc->ps.alf_list[sh->alf.cc_aps_id[i]]->data;
+            alf->ctb_cc_idc[i] = ff_vvc_alf_ctb_cc_idc(lc, rx, ry, i, aps->cc_filters_signalled[i]);
+        }
+    }
+}
+
+static void deblock_params(VVCLocalContext *lc, const int rx, const int ry)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCSH *sh     = &lc->sc->sh;
+    CTB(fc->tab.deblock, rx, ry) = sh->deblock;
+}
+
+static int hls_coding_tree_unit(VVCLocalContext *lc,
+    const int x0, const int y0, const int ctu_idx, const int rx, const int ry)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const VVCSH *sh             = &lc->sc->sh;
+    const unsigned int ctb_size = sps->ctb_size_y;
+    int ret                     = 0;
+
+    memset(lc->parse.chroma_qp_offset, 0, sizeof(lc->parse.chroma_qp_offset));
+
+    hls_sao(lc, x0 >> sps->ctb_log2_size_y, y0 >> sps->ctb_log2_size_y);
+    alf_params(lc, x0 >> sps->ctb_log2_size_y, y0 >> sps->ctb_log2_size_y);
+    deblock_params(lc, x0 >> sps->ctb_log2_size_y, y0 >> sps->ctb_log2_size_y);
+
+    if (IS_I(sh) && sps->qtbtt_dual_tree_intra_flag)
+        ret = dual_tree_implicit_qt_split(lc, x0, y0, ctb_size, 0);
+    else
+        ret = hls_coding_tree(lc, x0, y0, ctb_size, ctb_size,
+            1, 1, 0, 0, 0, 0, 0, SPLIT_NONE, SINGLE_TREE, MODE_TYPE_ALL);
+    if (ret < 0)
+        return ret;
+
+    if (rx == pps->ctb_to_col_bd[rx + 1] - 1) {
+        if (ctu_idx == sh->num_ctus_in_curr_slice - 1) {
+            const int end_of_slice_one_bit = ff_vvc_end_of_slice_flag_decode(lc);
+            if (!end_of_slice_one_bit)
+                return AVERROR_INVALIDDATA;
+        } else {
+            if (ry == pps->ctb_to_row_bd[ry + 1] - 1) {
+                const int end_of_tile_one_bit = ff_vvc_end_of_tile_one_bit(lc);
+                if (!end_of_tile_one_bit)
+                    return AVERROR_INVALIDDATA;
+            } else {
+                if (fc->ps.sps->entropy_coding_sync_enabled_flag) {
+                    const int end_of_subset_one_bit = ff_vvc_end_of_subset_one_bit(lc);
+                    if (!end_of_subset_one_bit)
+                        return AVERROR_INVALIDDATA;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int has_inter_luma(const CodingUnit *cu)
+{
+    return cu->pred_mode != MODE_INTRA && cu->pred_mode != MODE_PLT && cu->tree_type != DUAL_TREE_CHROMA;
+}
+
+static int pred_get_y(const int y0, const Mv *mv, const int height)
+{
+    return FFMAX(0, y0 + (mv->y >> 4) + height);
+}
+
+static void cu_get_max_y(const CodingUnit *cu, int max_y[2][VVC_MAX_REF_ENTRIES], const VVCFrameContext *fc)
+{
+    const PredictionUnit *pu    = &cu->pu;
+
+    if (pu->merge_gpm_flag) {
+        for (int i = 0; i < FF_ARRAY_ELEMS(pu->gpm_mv); i++) {
+            const MvField *mvf  = pu->gpm_mv + i;
+            const int lx        = mvf->pred_flag - PF_L0;
+            const int idx       = mvf->ref_idx[lx];
+            const int y         = pred_get_y(cu->y0, mvf->mv + lx, cu->cb_height);
+
+            max_y[lx][idx]      = FFMAX(max_y[lx][idx], y);
+        }
+    } else {
+        const MotionInfo *mi    = &pu->mi;
+        const int max_dmvr_off  = (!pu->inter_affine_flag && pu->dmvr_flag) ? 2 : 0;
+        const int sbw           = cu->cb_width / mi->num_sb_x;
+        const int sbh           = cu->cb_height / mi->num_sb_y;
+        for (int sby = 0; sby < mi->num_sb_y; sby++) {
+            for (int sbx = 0; sbx < mi->num_sb_x; sbx++) {
+                const int x0        = cu->x0 + sbx * sbw;
+                const int y0        = cu->y0 + sby * sbh;
+                const MvField *mvf  = ff_vvc_get_mvf(fc, x0, y0);
+                for (int lx = 0; lx < 2; lx++) {
+                    const PredFlag mask = 1 << lx;
+                    if (mvf->pred_flag & mask) {
+                        const int idx   = mvf->ref_idx[lx];
+                        const int y     = pred_get_y(y0, mvf->mv + lx, sbh);
+
+                        max_y[lx][idx]  = FFMAX(max_y[lx][idx], y + max_dmvr_off);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static void pred_get_max_y(VVCLocalContext *lc, const int rs)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSH *sh             = &lc->sc->sh;
+    CTU *ctu                    = fc->tab.ctus + rs;
+    const CodingUnit *cu        = ctu->cus;
+
+    if (IS_I(sh))
+        return;
+
+    for (int lx = 0; lx < 2; lx++)
+        memset(ctu->max_y[lx], -1, sizeof(ctu->max_y[0][0]) * sh->nb_refs[lx]);
+
+    while (cu) {
+        if (has_inter_luma(cu))
+            cu_get_max_y(cu, ctu->max_y, fc);
+        cu = cu->next;
+    }
+    ctu->max_y_idx[0] = ctu->max_y_idx[1] = 0;
+}
+
+int ff_vvc_coding_tree_unit(VVCLocalContext *lc,
+    const int ctu_idx, const int rs, const int rx, const int ry)
+{
+    const VVCFrameContext *fc   = lc->fc;
+    const VVCSPS *sps           = fc->ps.sps;
+    const VVCPPS *pps           = fc->ps.pps;
+    const int x_ctb             = rx << sps->ctb_log2_size_y;
+    const int y_ctb             = ry << sps->ctb_log2_size_y;
+    const int ctb_size          = 1 << sps->ctb_log2_size_y << sps->ctb_log2_size_y;
+    EntryPoint* ep              = lc->ep;
+    int ret;
+
+    if (rx == pps->ctb_to_col_bd[rx]) {
+        //fix me for ibc
+        ep->num_hmvp = 0;
+        ep->is_first_qg = ry == pps->ctb_to_row_bd[ry] || !ctu_idx;
+    }
+
+    lc->coeffs = fc->tab.coeffs + rs * ctb_size * VVC_MAX_SAMPLE_ARRAYS;
+    lc->cu     = NULL;
+
+    ff_vvc_cabac_init(lc, ctu_idx, rx, ry);
+    fc->tab.slice_idx[rs] = lc->sc->slice_idx;
+    ff_vvc_decode_neighbour(lc, x_ctb, y_ctb, rx, ry, rs);
+    ret = hls_coding_tree_unit(lc, x_ctb, y_ctb, ctu_idx, rx, ry);
+    if (ret < 0)
+        return ret;
+    pred_get_max_y(lc, rs);
+
+    return 0;
+}
+
 void ff_vvc_decode_neighbour(VVCLocalContext *lc, const int x_ctb, const int y_ctb,
     const int rx, const int ry, const int rs)
 {
@@ -70,13 +2421,30 @@ void ff_vvc_set_neighbour_available(VVCLocalContext *lc,
 
 void ff_vvc_ctu_free_cus(CTU *ctu)
 {
-    while (ctu->cus) {
-        CodingUnit *cu      = ctu->cus;
-        AVBufferRef *buf    = cu->buf;
+    CodingUnit *cu  = ctu->cus;
+    while (cu) {
+        AVBufferRef *cu_buf = cu->buf;
+        TransformUnit *tu   = cu->tus.head;
+
+        while (tu) {
+            AVBufferRef *buf = tu->buf;
+            tu  = tu->next;
+            av_buffer_unref(&buf);
+        }
+        cu->tus.head = cu->tus.tail = NULL;
 
-        ctu->cus = ctu->cus->next;
-        av_buffer_unref(&buf);
+        cu = cu->next;
+        av_buffer_unref(&cu_buf);
     }
+    ctu->cus = NULL;
+}
+
+int ff_vvc_get_qPy(const VVCFrameContext *fc, const int xc, const int yc)
+{
+    const int min_cb_log2_size_y = fc->ps.sps->min_cb_log2_size_y;
+    const int x                  = xc >> min_cb_log2_size_y;
+    const int y                  = yc >> min_cb_log2_size_y;
+    return fc->tab.qp[LUMA][x + y * fc->ps.pps->min_cb_width];
 }
 
 void ff_vvc_ep_init_stat_coeff(EntryPoint *ep,
diff --git a/libavcodec/vvc/vvc_ctu.h b/libavcodec/vvc/vvc_ctu.h
index df56848e13..d9cc1cf78f 100644
--- a/libavcodec/vvc/vvc_ctu.h
+++ b/libavcodec/vvc/vvc_ctu.h
@@ -400,6 +400,17 @@ struct ALFParams {
     uint8_t applied[3];
 };
 
+/**
+ * parse a CTU
+ * @param lc local context for CTU
+ * @param ctb_addr CTB(CTU) address in the current slice
+ * @param rs raster order for the CTU.
+ * @param rx raster order x for the CTU.
+ * @param ry raster order y for the CTU.
+ * @return AVERROR
+ */
+int ff_vvc_coding_tree_unit(VVCLocalContext *lc, int ctu_idx, int rs, int rx, int ry);
+
 //utils
 void ff_vvc_set_neighbour_available(VVCLocalContext *lc, int x0, int y0, int w, int h);
 void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, int ry, int rs);
-- 
2.25.1



More information about the ffmpeg-devel mailing list