[FFmpeg-devel] [PATCH 1/2] x86: hevc_mc: use proxy functions

Christophe Gisquet christophe.gisquet at gmail.com
Thu Oct 2 20:52:44 CEST 2014


Most functions were actually instanciated and unrolled, causing an increase
in object size.

On Win64, before: stripped object size 185404
36248 decicycles in qpel, 522688 runs, 1600 skips
39808 decicycles in qpel bi, 522920 runs, 1368 skips
6997 decicycles in epel, 1037753 runs, 10823 skips
8042 decicycles in epel bi, 1037563 runs, 11013 skips
14512 decicycles in qpel uni, 516830 runs, 7458 skips
3899 decicycles in epel uni, 1035334 runs, 13242 skips

After: stripped object size 155576
36335 decicycles in qpel, 522784 runs, 1504 skips
39727 decicycles in qpel bi, 522873 runs, 1415 skips
7040 decicycles in epel, 1038058 runs, 10518 skips
8068 decicycles in epel bi, 1037684 runs, 10892 skips
14608 decicycles in qpel uni, 516923 runs, 7365 skips
3920 decicycles in epel uni, 1035699 runs, 12877 skips
---
 libavcodec/x86/hevcdsp_init.c | 456 +++++++++++++++++++++++++++++++++---------
 1 file changed, 361 insertions(+), 95 deletions(-)

diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index eaa97e1..4c536ac 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -71,6 +71,43 @@ IDCT_FUNCS(32x32, sse2);
 IDCT_FUNCS(16x16, avx2);
 IDCT_FUNCS(32x32, avx2);
 
+
+#define mc_rep_proxy(name, bitd, step, opt) \
+static void proxy_##name##step##_##bitd##_##opt(int16_t *dst,                                                   \
+                                                uint8_t *src, ptrdiff_t srcstride, int height,                  \
+                                                intptr_t mx, intptr_t my, int width)                            \
+{                                                                                                               \
+    int i, step2 = step * ((bitd + 7) / 8);                                                                     \
+    for (i = 0; i < width; i += step, src += step2, dst += step)                                                \
+        ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, srcstride, height, mx, my, width);             \
+}
+#define mc_rep_uni_proxy(name, bitd, step, opt) \
+static void proxy_uni_##name##step##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride,                          \
+                                                    uint8_t *src, ptrdiff_t srcstride, int height,              \
+                                                    intptr_t mx, intptr_t my, int width)                        \
+{                                                                                                               \
+    int i, step2 = step * ((bitd + 7) / 8);                                                                     \
+    for (i = 0; i < width; i += step, src += step2, dst += step2)                                               \
+        ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, srcstride,                      \
+                                                           height, mx, my, width);                              \
+}
+#define mc_rep_bi_proxy(name, bitd, step, opt) \
+static void proxy_bi_##name##step##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,             \
+                                                   ptrdiff_t srcstride, int16_t* src2,                          \
+                                                   int height, intptr_t mx, intptr_t my, int width)             \
+{                                                                                                               \
+    int i, step2 = step * ((bitd + 7) / 8);                                                                     \
+    for (i = 0; i < width; i += step, src += step2, dst += step2, src2 += step)                                 \
+        ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, srcstride, src2,                 \
+                                                          height, mx, my, width);                               \
+}
+
+// Create the actual proxy functions
+#define mc_rep_proxies(name, bitd, step, opt)       \
+    mc_rep_proxy(name, bitd, step, opt);            \
+    mc_rep_uni_proxy(name, bitd, step, opt);        \
+    mc_rep_bi_proxy(name, bitd, step, opt)
+
 #define mc_rep_func(name, bitd, step, W, opt) \
 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst,                                                 \
                                                 uint8_t *_src, ptrdiff_t _srcstride, int height,                \
@@ -165,123 +202,352 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts
 
 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
 
-mc_rep_funcs(pel_pixels, 8, 16, 64, sse4);
-mc_rep_funcs(pel_pixels, 8, 16, 48, sse4);
-mc_rep_funcs(pel_pixels, 8, 16, 32, sse4);
+mc_rep_proxies(pel_pixels, 8, 16, sse4);
+#define ff_hevc_put_hevc_pel_pixels64_8_sse4      proxy_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels64_8_sse4  proxy_uni_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels64_8_sse4   proxy_bi_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_pel_pixels48_8_sse4      proxy_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels48_8_sse4  proxy_uni_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels48_8_sse4   proxy_bi_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_pel_pixels32_8_sse4      proxy_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels32_8_sse4  proxy_uni_pel_pixels16_8_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels32_8_sse4   proxy_bi_pel_pixels16_8_sse4
 mc_rep_funcs(pel_pixels, 8,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 64, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 48, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 32, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 16, sse4);
+
+mc_rep_proxies(pel_pixels, 10, 8, sse4);
+#define ff_hevc_put_hevc_pel_pixels64_10_sse4     proxy_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels64_10_sse4 proxy_uni_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels64_10_sse4  proxy_bi_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_pel_pixels48_10_sse4     proxy_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels48_10_sse4 proxy_uni_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels48_10_sse4  proxy_bi_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_pel_pixels32_10_sse4     proxy_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels32_10_sse4 proxy_uni_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels32_10_sse4  proxy_bi_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_pel_pixels24_10_sse4     proxy_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels24_10_sse4 proxy_uni_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels24_10_sse4  proxy_bi_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_pel_pixels16_10_sse4     proxy_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels16_10_sse4 proxy_uni_pel_pixels8_10_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels16_10_sse4  proxy_bi_pel_pixels8_10_sse4
 mc_rep_funcs(pel_pixels,10,  4, 12, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 64, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 48, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 32, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 16, sse4);
+
+mc_rep_proxies(pel_pixels,12,  8, sse4);
+#define ff_hevc_put_hevc_pel_pixels64_12_sse4     proxy_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels64_12_sse4 proxy_uni_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels64_12_sse4  proxy_bi_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_pel_pixels48_12_sse4     proxy_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels48_12_sse4 proxy_uni_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels48_12_sse4  proxy_bi_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_pel_pixels32_12_sse4     proxy_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels32_12_sse4 proxy_uni_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels32_12_sse4  proxy_bi_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_pel_pixels24_12_sse4     proxy_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels24_12_sse4 proxy_uni_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels24_12_sse4  proxy_bi_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_pel_pixels16_12_sse4     proxy_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_uni_pel_pixels16_12_sse4 proxy_uni_pel_pixels8_12_sse4
+#define ff_hevc_put_hevc_bi_pel_pixels16_12_sse4  proxy_bi_pel_pixels8_12_sse4
 mc_rep_funcs(pel_pixels,12,  4, 12, sse4);
 
-mc_rep_funcs(epel_h, 8, 16, 64, sse4);
-mc_rep_funcs(epel_h, 8, 16, 48, sse4);
-mc_rep_funcs(epel_h, 8, 16, 32, sse4);
+mc_rep_proxies(epel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_epel_h64_8_sse4      proxy_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_h64_8_sse4  proxy_uni_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_h64_8_sse4   proxy_bi_epel_h16_8_sse4
+#define ff_hevc_put_hevc_epel_h48_8_sse4      proxy_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_h48_8_sse4  proxy_uni_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_h48_8_sse4   proxy_bi_epel_h16_8_sse4
+#define ff_hevc_put_hevc_epel_h32_8_sse4      proxy_epel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_h32_8_sse4  proxy_uni_epel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_h32_8_sse4   proxy_bi_epel_h16_8_sse4
 mc_rep_funcs(epel_h, 8,  8, 24, sse4);
-mc_rep_funcs(epel_h,10,  8, 64, sse4);
-mc_rep_funcs(epel_h,10,  8, 48, sse4);
-mc_rep_funcs(epel_h,10,  8, 32, sse4);
-mc_rep_funcs(epel_h,10,  8, 24, sse4);
-mc_rep_funcs(epel_h,10,  8, 16, sse4);
+
+mc_rep_proxies(epel_h,10,  8, sse4);
+#define ff_hevc_put_hevc_epel_h64_10_sse4     proxy_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_h64_10_sse4 proxy_uni_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_h64_10_sse4  proxy_bi_epel_h8_10_sse4
+#define ff_hevc_put_hevc_epel_h48_10_sse4     proxy_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_h48_10_sse4 proxy_uni_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_h48_10_sse4  proxy_bi_epel_h8_10_sse4
+#define ff_hevc_put_hevc_epel_h32_10_sse4     proxy_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_h32_10_sse4 proxy_uni_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_h32_10_sse4  proxy_bi_epel_h8_10_sse4
+#define ff_hevc_put_hevc_epel_h24_10_sse4     proxy_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_h24_10_sse4 proxy_uni_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_h24_10_sse4  proxy_bi_epel_h8_10_sse4
+#define ff_hevc_put_hevc_epel_h16_10_sse4     proxy_epel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_h16_10_sse4 proxy_uni_epel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_h16_10_sse4  proxy_bi_epel_h8_10_sse4
 mc_rep_funcs(epel_h,10,  4, 12, sse4);
-mc_rep_funcs(epel_h,12,  8, 64, sse4);
-mc_rep_funcs(epel_h,12,  8, 48, sse4);
-mc_rep_funcs(epel_h,12,  8, 32, sse4);
-mc_rep_funcs(epel_h,12,  8, 24, sse4);
-mc_rep_funcs(epel_h,12,  8, 16, sse4);
+
+mc_rep_proxies(epel_h,12,  8, sse4);
+#define ff_hevc_put_hevc_epel_h64_12_sse4     proxy_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_h64_12_sse4 proxy_uni_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_h64_12_sse4  proxy_bi_epel_h8_12_sse4
+#define ff_hevc_put_hevc_epel_h48_12_sse4     proxy_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_h48_12_sse4 proxy_uni_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_h48_12_sse4  proxy_bi_epel_h8_12_sse4
+#define ff_hevc_put_hevc_epel_h32_12_sse4     proxy_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_h32_12_sse4 proxy_uni_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_h32_12_sse4  proxy_bi_epel_h8_12_sse4
+#define ff_hevc_put_hevc_epel_h24_12_sse4     proxy_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_h24_12_sse4 proxy_uni_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_h24_12_sse4  proxy_bi_epel_h8_12_sse4
+#define ff_hevc_put_hevc_epel_h16_12_sse4     proxy_epel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_h16_12_sse4 proxy_uni_epel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_h16_12_sse4  proxy_bi_epel_h8_12_sse4
 mc_rep_funcs(epel_h,12,  4, 12, sse4);
-mc_rep_funcs(epel_v, 8, 16, 64, sse4);
-mc_rep_funcs(epel_v, 8, 16, 48, sse4);
-mc_rep_funcs(epel_v, 8, 16, 32, sse4);
+
+mc_rep_proxies(epel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_epel_v64_8_sse4      proxy_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_v64_8_sse4  proxy_uni_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_v64_8_sse4   proxy_bi_epel_v16_8_sse4
+#define ff_hevc_put_hevc_epel_v48_8_sse4      proxy_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_v48_8_sse4  proxy_uni_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_v48_8_sse4   proxy_bi_epel_v16_8_sse4
+#define ff_hevc_put_hevc_epel_v32_8_sse4      proxy_epel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_epel_v32_8_sse4  proxy_uni_epel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_epel_v32_8_sse4   proxy_bi_epel_v16_8_sse4
 mc_rep_funcs(epel_v, 8,  8, 24, sse4);
-mc_rep_funcs(epel_v,10,  8, 64, sse4);
-mc_rep_funcs(epel_v,10,  8, 48, sse4);
-mc_rep_funcs(epel_v,10,  8, 32, sse4);
-mc_rep_funcs(epel_v,10,  8, 24, sse4);
-mc_rep_funcs(epel_v,10,  8, 16, sse4);
+
+mc_rep_proxies(epel_v,10,  8, sse4);
+#define ff_hevc_put_hevc_epel_v64_10_sse4     proxy_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_v64_10_sse4 proxy_uni_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_v64_10_sse4  proxy_bi_epel_v8_10_sse4
+#define ff_hevc_put_hevc_epel_v48_10_sse4     proxy_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_v48_10_sse4 proxy_uni_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_v48_10_sse4  proxy_bi_epel_v8_10_sse4
+#define ff_hevc_put_hevc_epel_v32_10_sse4     proxy_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_v32_10_sse4 proxy_uni_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_v32_10_sse4  proxy_bi_epel_v8_10_sse4
+#define ff_hevc_put_hevc_epel_v24_10_sse4     proxy_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_v24_10_sse4 proxy_uni_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_v24_10_sse4  proxy_bi_epel_v8_10_sse4
+#define ff_hevc_put_hevc_epel_v16_10_sse4     proxy_epel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_v16_10_sse4 proxy_uni_epel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_v16_10_sse4  proxy_bi_epel_v8_10_sse4
 mc_rep_funcs(epel_v,10,  4, 12, sse4);
-mc_rep_funcs(epel_v,12,  8, 64, sse4);
-mc_rep_funcs(epel_v,12,  8, 48, sse4);
-mc_rep_funcs(epel_v,12,  8, 32, sse4);
-mc_rep_funcs(epel_v,12,  8, 24, sse4);
-mc_rep_funcs(epel_v,12,  8, 16, sse4);
+
+mc_rep_proxies(epel_v,12,  8, sse4);
+#define ff_hevc_put_hevc_epel_v64_12_sse4     proxy_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_v64_12_sse4 proxy_uni_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_v64_12_sse4  proxy_bi_epel_v8_12_sse4
+#define ff_hevc_put_hevc_epel_v48_12_sse4     proxy_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_v48_12_sse4 proxy_uni_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_v48_12_sse4  proxy_bi_epel_v8_12_sse4
+#define ff_hevc_put_hevc_epel_v32_12_sse4     proxy_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_v32_12_sse4 proxy_uni_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_v32_12_sse4  proxy_bi_epel_v8_12_sse4
+#define ff_hevc_put_hevc_epel_v24_12_sse4     proxy_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_v24_12_sse4 proxy_uni_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_v24_12_sse4  proxy_bi_epel_v8_12_sse4
+#define ff_hevc_put_hevc_epel_v16_12_sse4     proxy_epel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_v16_12_sse4 proxy_uni_epel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_v16_12_sse4  proxy_bi_epel_v8_12_sse4
 mc_rep_funcs(epel_v,12,  4, 12, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 64, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 48, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 32, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 24, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 16, sse4);
+
+mc_rep_proxies(epel_hv, 8, 8, sse4);
+#define ff_hevc_put_hevc_epel_hv64_8_sse4      proxy_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_epel_hv64_8_sse4  proxy_uni_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_epel_hv64_8_sse4   proxy_bi_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_epel_hv48_8_sse4      proxy_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_epel_hv48_8_sse4  proxy_uni_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_epel_hv48_8_sse4   proxy_bi_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_epel_hv32_8_sse4      proxy_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_epel_hv32_8_sse4  proxy_uni_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_epel_hv32_8_sse4   proxy_bi_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_epel_hv24_8_sse4      proxy_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_epel_hv24_8_sse4  proxy_uni_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_epel_hv24_8_sse4   proxy_bi_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_epel_hv16_8_sse4      proxy_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_epel_hv16_8_sse4  proxy_uni_epel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_epel_hv16_8_sse4   proxy_bi_epel_hv8_8_sse4
 mc_rep_funcs2(epel_hv,8,  8,  4, 12, sse4);
-mc_rep_funcs(epel_hv,10,  8, 64, sse4);
-mc_rep_funcs(epel_hv,10,  8, 48, sse4);
-mc_rep_funcs(epel_hv,10,  8, 32, sse4);
-mc_rep_funcs(epel_hv,10,  8, 24, sse4);
-mc_rep_funcs(epel_hv,10,  8, 16, sse4);
+
+mc_rep_proxies(epel_hv,10,  8, sse4);
+#define ff_hevc_put_hevc_epel_hv64_10_sse4     proxy_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_hv64_10_sse4 proxy_uni_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_hv64_10_sse4  proxy_bi_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_epel_hv48_10_sse4     proxy_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_hv48_10_sse4 proxy_uni_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_hv48_10_sse4  proxy_bi_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_epel_hv32_10_sse4     proxy_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_hv32_10_sse4 proxy_uni_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_hv32_10_sse4  proxy_bi_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_epel_hv24_10_sse4     proxy_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_hv24_10_sse4 proxy_uni_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_hv24_10_sse4  proxy_bi_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_epel_hv16_10_sse4     proxy_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_epel_hv16_10_sse4 proxy_uni_epel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_epel_hv16_10_sse4  proxy_bi_epel_hv8_10_sse4
 mc_rep_funcs(epel_hv,10,  4, 12, sse4);
-mc_rep_funcs(epel_hv,12,  8, 64, sse4);
-mc_rep_funcs(epel_hv,12,  8, 48, sse4);
-mc_rep_funcs(epel_hv,12,  8, 32, sse4);
-mc_rep_funcs(epel_hv,12,  8, 24, sse4);
-mc_rep_funcs(epel_hv,12,  8, 16, sse4);
+
+mc_rep_proxies(epel_hv,12,  8, sse4);
+#define ff_hevc_put_hevc_epel_hv64_12_sse4     proxy_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_hv64_12_sse4 proxy_uni_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_hv64_12_sse4  proxy_bi_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_epel_hv48_12_sse4     proxy_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_hv48_12_sse4 proxy_uni_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_hv48_12_sse4  proxy_bi_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_epel_hv32_12_sse4     proxy_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_hv32_12_sse4 proxy_uni_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_hv32_12_sse4  proxy_bi_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_epel_hv24_12_sse4     proxy_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_hv24_12_sse4 proxy_uni_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_hv24_12_sse4  proxy_bi_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_epel_hv16_12_sse4     proxy_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_epel_hv16_12_sse4 proxy_uni_epel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_epel_hv16_12_sse4  proxy_bi_epel_hv8_12_sse4
 mc_rep_funcs(epel_hv,12,  4, 12, sse4);
 
-mc_rep_funcs(qpel_h, 8, 16, 64, sse4);
-mc_rep_funcs(qpel_h, 8, 16, 48, sse4);
-mc_rep_funcs(qpel_h, 8, 16, 32, sse4);
+mc_rep_proxies(qpel_h, 8, 16, sse4);
+#define ff_hevc_put_hevc_qpel_h64_8_sse4      proxy_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_h64_8_sse4  proxy_uni_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_h64_8_sse4   proxy_bi_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_qpel_h48_8_sse4      proxy_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_h48_8_sse4  proxy_uni_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_h48_8_sse4   proxy_bi_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_qpel_h32_8_sse4      proxy_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_h32_8_sse4  proxy_uni_qpel_h16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_h32_8_sse4   proxy_bi_qpel_h16_8_sse4
 mc_rep_funcs(qpel_h, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_h,10,  8, 64, sse4);
-mc_rep_funcs(qpel_h,10,  8, 48, sse4);
-mc_rep_funcs(qpel_h,10,  8, 32, sse4);
-mc_rep_funcs(qpel_h,10,  8, 24, sse4);
-mc_rep_funcs(qpel_h,10,  8, 16, sse4);
+
+mc_rep_proxies(qpel_h,10,  8, sse4);
+#define ff_hevc_put_hevc_qpel_h64_10_sse4     proxy_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_h64_10_sse4 proxy_uni_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_h64_10_sse4  proxy_bi_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_qpel_h48_10_sse4     proxy_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_h48_10_sse4 proxy_uni_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_h48_10_sse4  proxy_bi_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_qpel_h32_10_sse4     proxy_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_h32_10_sse4 proxy_uni_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_h32_10_sse4  proxy_bi_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_qpel_h24_10_sse4     proxy_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_h24_10_sse4 proxy_uni_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_h24_10_sse4  proxy_bi_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_qpel_h16_10_sse4     proxy_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_h16_10_sse4 proxy_uni_qpel_h8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_h16_10_sse4  proxy_bi_qpel_h8_10_sse4
 mc_rep_funcs(qpel_h,10,  4, 12, sse4);
-mc_rep_funcs(qpel_h,12,  8, 64, sse4);
-mc_rep_funcs(qpel_h,12,  8, 48, sse4);
-mc_rep_funcs(qpel_h,12,  8, 32, sse4);
-mc_rep_funcs(qpel_h,12,  8, 24, sse4);
-mc_rep_funcs(qpel_h,12,  8, 16, sse4);
+
+mc_rep_proxies(qpel_h,12,  8, sse4);
+#define ff_hevc_put_hevc_qpel_h64_12_sse4     proxy_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_h64_12_sse4 proxy_uni_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_h64_12_sse4  proxy_bi_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_qpel_h48_12_sse4     proxy_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_h48_12_sse4 proxy_uni_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_h48_12_sse4  proxy_bi_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_qpel_h32_12_sse4     proxy_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_h32_12_sse4 proxy_uni_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_h32_12_sse4  proxy_bi_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_qpel_h24_12_sse4     proxy_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_h24_12_sse4 proxy_uni_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_h24_12_sse4  proxy_bi_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_qpel_h16_12_sse4     proxy_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_h16_12_sse4 proxy_uni_qpel_h8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_h16_12_sse4  proxy_bi_qpel_h8_12_sse4
 mc_rep_funcs(qpel_h,12,  4, 12, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 64, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 48, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 32, sse4);
+
+mc_rep_proxies(qpel_v, 8, 16, sse4);
+#define ff_hevc_put_hevc_qpel_v64_8_sse4      proxy_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_v64_8_sse4  proxy_uni_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_v64_8_sse4   proxy_bi_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_qpel_v48_8_sse4      proxy_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_v48_8_sse4  proxy_uni_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_v48_8_sse4   proxy_bi_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_qpel_v32_8_sse4      proxy_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_v32_8_sse4  proxy_uni_qpel_v16_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_v32_8_sse4   proxy_bi_qpel_v16_8_sse4
 mc_rep_funcs(qpel_v, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_v,10,  8, 64, sse4);
-mc_rep_funcs(qpel_v,10,  8, 48, sse4);
-mc_rep_funcs(qpel_v,10,  8, 32, sse4);
-mc_rep_funcs(qpel_v,10,  8, 24, sse4);
-mc_rep_funcs(qpel_v,10,  8, 16, sse4);
+
+mc_rep_proxies(qpel_v,10,  8, sse4);
+#define ff_hevc_put_hevc_qpel_v64_10_sse4     proxy_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_v64_10_sse4 proxy_uni_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_v64_10_sse4  proxy_bi_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_qpel_v48_10_sse4     proxy_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_v48_10_sse4 proxy_uni_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_v48_10_sse4  proxy_bi_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_qpel_v32_10_sse4     proxy_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_v32_10_sse4 proxy_uni_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_v32_10_sse4  proxy_bi_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_qpel_v24_10_sse4     proxy_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_v24_10_sse4 proxy_uni_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_v24_10_sse4  proxy_bi_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_qpel_v16_10_sse4     proxy_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_v16_10_sse4 proxy_uni_qpel_v8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_v16_10_sse4  proxy_bi_qpel_v8_10_sse4
 mc_rep_funcs(qpel_v,10,  4, 12, sse4);
-mc_rep_funcs(qpel_v,12,  8, 64, sse4);
-mc_rep_funcs(qpel_v,12,  8, 48, sse4);
-mc_rep_funcs(qpel_v,12,  8, 32, sse4);
-mc_rep_funcs(qpel_v,12,  8, 24, sse4);
-mc_rep_funcs(qpel_v,12,  8, 16, sse4);
+
+mc_rep_proxies(qpel_v,12,  8, sse4);
+#define ff_hevc_put_hevc_qpel_v64_12_sse4     proxy_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_v64_12_sse4 proxy_uni_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_v64_12_sse4  proxy_bi_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_qpel_v48_12_sse4     proxy_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_v48_12_sse4 proxy_uni_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_v48_12_sse4  proxy_bi_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_qpel_v32_12_sse4     proxy_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_v32_12_sse4 proxy_uni_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_v32_12_sse4  proxy_bi_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_qpel_v24_12_sse4     proxy_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_v24_12_sse4 proxy_uni_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_v24_12_sse4  proxy_bi_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_qpel_v16_12_sse4     proxy_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_v16_12_sse4 proxy_uni_qpel_v8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_v16_12_sse4  proxy_bi_qpel_v8_12_sse4
 mc_rep_funcs(qpel_v,12,  4, 12, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 64, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 48, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 32, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 16, sse4);
+
+mc_rep_proxies(qpel_hv, 8, 8, sse4);
+#define ff_hevc_put_hevc_qpel_hv64_8_sse4      proxy_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv64_8_sse4  proxy_uni_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv64_8_sse4   proxy_bi_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_qpel_hv48_8_sse4      proxy_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv48_8_sse4  proxy_uni_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv48_8_sse4   proxy_bi_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_qpel_hv32_8_sse4      proxy_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv32_8_sse4  proxy_uni_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv32_8_sse4   proxy_bi_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_qpel_hv24_8_sse4      proxy_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv24_8_sse4  proxy_uni_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv24_8_sse4   proxy_bi_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_qpel_hv16_8_sse4      proxy_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv16_8_sse4  proxy_uni_qpel_hv8_8_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv16_8_sse4   proxy_bi_qpel_hv8_8_sse4
 mc_rep_funcs2(qpel_hv,8,  8,  4, 12, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 64, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 48, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 32, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 24, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 16, sse4);
+
+mc_rep_proxies(qpel_hv,10,  8, sse4);
+#define ff_hevc_put_hevc_qpel_hv64_10_sse4     proxy_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv64_10_sse4 proxy_uni_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv64_10_sse4  proxy_bi_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_qpel_hv48_10_sse4     proxy_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv48_10_sse4 proxy_uni_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv48_10_sse4  proxy_bi_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_qpel_hv32_10_sse4     proxy_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv32_10_sse4 proxy_uni_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv32_10_sse4  proxy_bi_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_qpel_hv24_10_sse4     proxy_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv24_10_sse4 proxy_uni_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv24_10_sse4  proxy_bi_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_qpel_hv16_10_sse4     proxy_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv16_10_sse4 proxy_uni_qpel_hv8_10_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv16_10_sse4  proxy_bi_qpel_hv8_10_sse4
 mc_rep_funcs(qpel_hv,10,  4, 12, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 64, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 48, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 32, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 24, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 16, sse4);
+
+mc_rep_proxies(qpel_hv,12,  8, sse4);
+#define ff_hevc_put_hevc_qpel_hv64_12_sse4     proxy_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv64_12_sse4 proxy_uni_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv64_12_sse4  proxy_bi_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_qpel_hv48_12_sse4     proxy_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv48_12_sse4 proxy_uni_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv48_12_sse4  proxy_bi_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_qpel_hv32_12_sse4     proxy_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv32_12_sse4 proxy_uni_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv32_12_sse4  proxy_bi_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_qpel_hv24_12_sse4     proxy_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv24_12_sse4 proxy_uni_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv24_12_sse4  proxy_bi_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_qpel_hv16_12_sse4     proxy_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_uni_qpel_hv16_12_sse4 proxy_uni_qpel_hv8_12_sse4
+#define ff_hevc_put_hevc_bi_qpel_hv16_12_sse4  proxy_bi_qpel_hv8_12_sse4
 mc_rep_funcs(qpel_hv,12,  4, 12, sse4);
 
 #define mc_rep_uni_w(bitd, step, W, opt) \
-- 
1.9.2.msysgit.0



More information about the ffmpeg-devel mailing list