[FFmpeg-cvslog] hevcdsp: split the qpel functions by width instead of by the subpixel fraction

Anton Khirnov git at videolan.org
Tue Dec 15 10:47:12 CET 2015


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Fri Jul 24 08:24:21 2015 +0200| [1f821750f0b8d0c87cbf88a28ad699b92db5ec88] | committer: Anton Khirnov

hevcdsp: split the qpel functions by width instead of by the subpixel fraction

This should allow for more efficient SIMD.

Keep the C versions as they are now, to allow the compiler to inline the
interpolation coefficients.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1f821750f0b8d0c87cbf88a28ad699b92db5ec88
---

 libavcodec/hevc.c             |   19 ++++++----
 libavcodec/hevcdsp.c          |   30 +++++++--------
 libavcodec/hevcdsp.h          |    6 +--
 libavcodec/hevcdsp_template.c |   82 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 108 insertions(+), 29 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index e1b1be3..f2303ac 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -1479,7 +1479,7 @@ static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
  */
 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
-                    int block_w, int block_h)
+                    int block_w, int block_h, int pred_idx)
 {
     HEVCLocalContext *lc = &s->HEVClc;
     uint8_t *src         = ref->data[0];
@@ -1513,8 +1513,8 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
         src = lc->edge_emu_buffer + buf_offset;
         srcstride = edge_emu_stride;
     }
-    s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
-                                     block_h, lc->mc_buffer);
+    s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
+                                                   block_h, mx, my, lc->mc_buffer);
 }
 
 /**
@@ -1651,6 +1651,11 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                                 int nPbW, int nPbH,
                                 int log2_cb_size, int partIdx)
 {
+    static const int pred_indices[] = {
+        [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
+    };
+    const int pred_idx = pred_indices[nPbW];
+
 #define POS(c_idx, x, y)                                                              \
     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
@@ -1719,7 +1724,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
+                &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
@@ -1755,7 +1760,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
+                &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
@@ -1792,9 +1797,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
+                &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
         luma_mc(s, tmp2, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
+                &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 216101a..86d9e85 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -116,6 +116,12 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
 #undef FUNC
 #define FUNC(a, depth) a ## _ ## depth
 
+#define QPEL_FUNC(i, width, depth)                                                  \
+    hevcdsp->put_hevc_qpel[0][0][i] = FUNC(put_hevc_qpel_pixels_ ## width, depth);  \
+    hevcdsp->put_hevc_qpel[0][1][i] = FUNC(put_hevc_qpel_h_      ## width, depth);  \
+    hevcdsp->put_hevc_qpel[1][0][i] = FUNC(put_hevc_qpel_v_      ## width, depth);  \
+    hevcdsp->put_hevc_qpel[1][1][i] = FUNC(put_hevc_qpel_hv_     ## width, depth);  \
+
 #define HEVC_DSP(depth)                                                     \
     hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
     hevcdsp->transquant_bypass[0]   = FUNC(transquant_bypass4x4, depth);    \
@@ -139,22 +145,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth);           \
     hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth);           \
                                                                             \
-    hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth);       \
-    hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth);           \
-    hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth);           \
-    hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth);           \
-    hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth);           \
-    hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth);         \
-    hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth);         \
-    hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth);         \
-    hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth);           \
-    hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth);         \
-    hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth);         \
-    hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth);         \
-    hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth);           \
-    hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth);         \
-    hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth);         \
-    hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth);         \
+    QPEL_FUNC(0, 4,  depth);                                                \
+    QPEL_FUNC(1, 8,  depth);                                                \
+    QPEL_FUNC(2, 12, depth);                                                \
+    QPEL_FUNC(3, 16, depth);                                                \
+    QPEL_FUNC(4, 24, depth);                                                \
+    QPEL_FUNC(5, 32, depth);                                                \
+    QPEL_FUNC(6, 48, depth);                                                \
+    QPEL_FUNC(7, 64, depth);                                                \
                                                                             \
     hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth);       \
     hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth);            \
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index 7278464..c250385 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -58,9 +58,9 @@ typedef struct HEVCDSPContext {
                                int height, int c_idx, uint8_t vert_edge,
                                uint8_t horiz_edge, uint8_t diag_edge);
 
-    void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
-                                ptrdiff_t srcstride, int width, int height,
-                                int16_t *mcbuffer);
+    void (*put_hevc_qpel[2][2][8])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
+                                   ptrdiff_t srcstride, int height,
+                                   int mx, int my, int16_t *mcbuffer);
     void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
                                 ptrdiff_t srcstride, int width, int height,
                                 int mx, int my, int16_t *mcbuffer);
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 390f683..84503ec 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -775,9 +775,11 @@ static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
 #undef TR_16
 #undef TR_32
 
-static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
-                                       uint8_t *_src, ptrdiff_t _srcstride,
-                                       int width, int height, int16_t* mcbuffer)
+static av_always_inline void
+FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
+                           uint8_t *_src, ptrdiff_t _srcstride,
+                           int width, int height, int mx, int my,
+                           int16_t* mcbuffer)
 {
     int x, y;
     pixel *src          = (pixel *)_src;
@@ -906,6 +908,80 @@ PUT_HEVC_QPEL_HV(3, 1)
 PUT_HEVC_QPEL_HV(3, 2)
 PUT_HEVC_QPEL_HV(3, 3)
 
+#define QPEL(W)                                                                             \
+static void FUNC(put_hevc_qpel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride,             \
+                                             uint8_t *src, ptrdiff_t srcstride,             \
+                                             int height, int mx, int my,                    \
+                                             int16_t *mcbuffer)                             \
+{                                                                                           \
+    FUNC(put_hevc_qpel_pixels)(dst, dststride, src, srcstride, W, height,                   \
+                               mx, my, mcbuffer);                                           \
+}                                                                                           \
+                                                                                            \
+static void FUNC(put_hevc_qpel_h_ ## W)(int16_t *dst, ptrdiff_t dststride,                  \
+                                        uint8_t *src, ptrdiff_t srcstride,                  \
+                                        int height, int mx, int my,                         \
+                                        int16_t *mcbuffer)                                  \
+{                                                                                           \
+    if (mx == 1)                                                                            \
+        FUNC(put_hevc_qpel_h1)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+    else if (mx == 2)                                                                       \
+        FUNC(put_hevc_qpel_h2)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+    else                                                                                    \
+        FUNC(put_hevc_qpel_h3)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+}                                                                                           \
+                                                                                            \
+static void FUNC(put_hevc_qpel_v_ ## W)(int16_t *dst, ptrdiff_t dststride,                  \
+                                             uint8_t *src, ptrdiff_t srcstride,             \
+                                             int height, int mx, int my,                    \
+                                             int16_t *mcbuffer)                             \
+{                                                                                           \
+    if (my == 1)                                                                            \
+        FUNC(put_hevc_qpel_v1)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+    else if (my == 2)                                                                       \
+        FUNC(put_hevc_qpel_v2)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+    else                                                                                    \
+        FUNC(put_hevc_qpel_v3)(dst, dststride, src, srcstride, W, height, mcbuffer);        \
+}                                                                                           \
+                                                                                            \
+static void FUNC(put_hevc_qpel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride,                 \
+                                             uint8_t *src, ptrdiff_t srcstride,             \
+                                             int height, int mx, int my,                    \
+                                             int16_t *mcbuffer)                             \
+{                                                                                           \
+    if (my == 1) {                                                                          \
+        if (mx == 1)                                                                        \
+            FUNC(put_hevc_qpel_h1v1)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else if (mx == 2)                                                                   \
+            FUNC(put_hevc_qpel_h2v1)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else                                                                                \
+            FUNC(put_hevc_qpel_h3v1)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+    } else if (my == 2) {                                                                   \
+        if (mx == 1)                                                                        \
+            FUNC(put_hevc_qpel_h1v2)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else if (mx == 2)                                                                   \
+            FUNC(put_hevc_qpel_h2v2)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else                                                                                \
+            FUNC(put_hevc_qpel_h3v2)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+    } else {                                                                                \
+        if (mx == 1)                                                                        \
+            FUNC(put_hevc_qpel_h1v3)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else if (mx == 2)                                                                   \
+            FUNC(put_hevc_qpel_h2v3)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+        else                                                                                \
+            FUNC(put_hevc_qpel_h3v3)(dst, dststride, src, srcstride, W, height, mcbuffer);  \
+    }                                                                                       \
+}
+
+QPEL(64)
+QPEL(48)
+QPEL(32)
+QPEL(24)
+QPEL(16)
+QPEL(12)
+QPEL(8)
+QPEL(4)
+
 static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
                                        uint8_t *_src, ptrdiff_t _srcstride,
                                        int width, int height, int mx, int my,



More information about the ffmpeg-cvslog mailing list