[FFmpeg-devel] [PATCH 4/4] avfilter/vf_v360: x86 SIMD for interpolations

Paul B Mahol onemda at gmail.com
Wed Sep 4 22:28:44 EEST 2019


Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 libavfilter/v360.h             | 113 ++++++++++++++++
 libavfilter/vf_v360.c          | 236 ++++++++++++---------------------
 libavfilter/x86/Makefile       |   2 +
 libavfilter/x86/vf_v360.asm    | 104 +++++++++++++++
 libavfilter/x86/vf_v360_init.c |  43 ++++++
 5 files changed, 349 insertions(+), 149 deletions(-)
 create mode 100644 libavfilter/v360.h
 create mode 100644 libavfilter/x86/vf_v360.asm
 create mode 100644 libavfilter/x86/vf_v360_init.c

diff --git a/libavfilter/v360.h b/libavfilter/v360.h
new file mode 100644
index 0000000000..a0eefdec16
--- /dev/null
+++ b/libavfilter/v360.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 Eugene Lyapustin
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_V360_H
+#define AVFILTER_V360_H
+#include "avfilter.h"
+
+enum Projections {
+    EQUIRECTANGULAR,
+    CUBEMAP_3_2,
+    CUBEMAP_6_1,
+    EQUIANGULAR,
+    FLAT,
+    DUAL_FISHEYE,
+    BARREL,
+    CUBEMAP_1_6,
+    NB_PROJECTIONS,
+};
+
+enum InterpMethod {
+    NEAREST,
+    BILINEAR,
+    BICUBIC,
+    LANCZOS,
+    NB_INTERP_METHODS,
+};
+
+enum Faces {
+    TOP_LEFT,
+    TOP_MIDDLE,
+    TOP_RIGHT,
+    BOTTOM_LEFT,
+    BOTTOM_MIDDLE,
+    BOTTOM_RIGHT,
+    NB_FACES,
+};
+
+enum Direction {
+    RIGHT,  ///< Axis +X
+    LEFT,   ///< Axis -X
+    UP,     ///< Axis +Y
+    DOWN,   ///< Axis -Y
+    FRONT,  ///< Axis -Z
+    BACK,   ///< Axis +Z
+    NB_DIRECTIONS,
+};
+
+enum Rotation {
+    ROT_0,
+    ROT_90,
+    ROT_180,
+    ROT_270,
+    NB_ROTATIONS,
+};
+
+typedef struct V360Context {
+    const AVClass *class;
+    int in, out;
+    int interp;
+    int width, height;
+    char* in_forder;
+    char* out_forder;
+    char* in_frot;
+    char* out_frot;
+
+    int in_cubemap_face_order[6];
+    int out_cubemap_direction_order[6];
+    int in_cubemap_face_rotation[6];
+    int out_cubemap_face_rotation[6];
+
+    float in_pad, out_pad;
+
+    float yaw, pitch, roll;
+
+    int h_flip, v_flip, d_flip;
+
+    float h_fov, v_fov;
+    float flat_range[3];
+
+    int planewidth[4], planeheight[4];
+    int inplanewidth[4], inplaneheight[4];
+    int nb_planes;
+
+    uint16_t *u[4], *v[4];
+    int16_t *ker[4];
+
+    int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+
+    void (*remap_line)(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
+                       const uint16_t *u, const uint16_t *v, const int16_t *ker);
+} V360Context;
+
+void ff_v360_init(V360Context *s, int depth);
+void ff_v360_init_x86(V360Context *s, int depth);
+
+#endif /* AVFILTER_V360_H */
diff --git a/libavfilter/vf_v360.c b/libavfilter/vf_v360.c
index fc120097d9..e69aa7e8c5 100644
--- a/libavfilter/vf_v360.c
+++ b/libavfilter/vf_v360.c
@@ -41,88 +41,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
-
-enum Projections {
-    EQUIRECTANGULAR,
-    CUBEMAP_3_2,
-    CUBEMAP_6_1,
-    EQUIANGULAR,
-    FLAT,
-    DUAL_FISHEYE,
-    BARREL,
-    CUBEMAP_1_6,
-    NB_PROJECTIONS,
-};
-
-enum InterpMethod {
-    NEAREST,
-    BILINEAR,
-    BICUBIC,
-    LANCZOS,
-    NB_INTERP_METHODS,
-};
-
-enum Faces {
-    TOP_LEFT,
-    TOP_MIDDLE,
-    TOP_RIGHT,
-    BOTTOM_LEFT,
-    BOTTOM_MIDDLE,
-    BOTTOM_RIGHT,
-    NB_FACES,
-};
-
-enum Direction {
-    RIGHT,  ///< Axis +X
-    LEFT,   ///< Axis -X
-    UP,     ///< Axis +Y
-    DOWN,   ///< Axis -Y
-    FRONT,  ///< Axis -Z
-    BACK,   ///< Axis +Z
-    NB_DIRECTIONS,
-};
-
-enum Rotation {
-    ROT_0,
-    ROT_90,
-    ROT_180,
-    ROT_270,
-    NB_ROTATIONS,
-};
-
-typedef struct V360Context {
-    const AVClass *class;
-    int in, out;
-    int interp;
-    int width, height;
-    char* in_forder;
-    char* out_forder;
-    char* in_frot;
-    char* out_frot;
-
-    int in_cubemap_face_order[6];
-    int out_cubemap_direction_order[6];
-    int in_cubemap_face_rotation[6];
-    int out_cubemap_face_rotation[6];
-
-    float in_pad, out_pad;
-
-    float yaw, pitch, roll;
-
-    int h_flip, v_flip, d_flip;
-
-    float h_fov, v_fov;
-    float flat_range[3];
-
-    int planewidth[4], planeheight[4];
-    int inplanewidth[4], inplaneheight[4];
-    int nb_planes;
-
-    uint16_t *u[4], *v[4];
-    int16_t *ker[4];
-
-    int (*remap_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
-} V360Context;
+#include "v360.h"
 
 typedef struct ThreadData {
     AVFrame *in;
@@ -251,47 +170,26 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-/**
- * Generate no-interpolation remapping function with a given pixel depth.
- *
- * @param bits number of bits per pixel
- * @param div number of bytes per pixel
- */
-#define DEFINE_REMAP1(bits, div)                                                             \
-static int remap1_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
-{                                                                                            \
-    ThreadData *td = (ThreadData*)arg;                                                       \
-    const V360Context *s = ctx->priv;                                                        \
-    const AVFrame *in = td->in;                                                              \
-    AVFrame *out = td->out;                                                                  \
-                                                                                             \
-    int plane, x, y;                                                                         \
-                                                                                             \
-    for (plane = 0; plane < s->nb_planes; plane++) {                                         \
-        const int in_linesize  = in->linesize[plane]  / div;                                 \
-        const int out_linesize = out->linesize[plane] / div;                                 \
-        const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane];                 \
-        uint##bits##_t *dst = (uint##bits##_t *)out->data[plane];                            \
-        const int width = s->planewidth[plane];                                              \
-        const int height = s->planeheight[plane];                                            \
-                                                                                             \
-        const int slice_start = (height *  jobnr     ) / nb_jobs;                            \
-        const int slice_end   = (height * (jobnr + 1)) / nb_jobs;                            \
-                                                                                             \
-        for (y = slice_start; y < slice_end; y++) {                                          \
-            const uint16_t *u = s->u[plane] + y * width;                                     \
-            const uint16_t *v = s->v[plane] + y * width;                                     \
-            uint##bits##_t *d = dst + y * out_linesize;                                      \
-            for (x = 0; x < width; x++)                                                      \
-                *d++ = src[v[x] * in_linesize + u[x]];                                       \
-        }                                                                                    \
-    }                                                                                        \
-                                                                                             \
-    return 0;                                                                                \
+#define DEFINE_REMAP1_LINE(bits, div)                                                                \
+static void remap1_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src,              \
+                                      ptrdiff_t in_linesize,                                    \
+                                      const uint16_t *u, const uint16_t *v, const int16_t *ker) \
+{                                                                                                    \
+    const uint##bits##_t *s = (const uint##bits##_t *)src;                                           \
+    uint##bits##_t *d = (uint##bits##_t *)dst;                                                       \
+                                                                                                     \
+    in_linesize /= div;                                                                              \
+                                                                                                     \
+    for (int x = 0; x < width; x++) {                                                                \
+        const uint16_t *uu = u + x;                                                                  \
+        const uint16_t *vv = v + x;                                                                  \
+                                                                                                     \
+        d[x] = s[vv[0] * in_linesize + uu[0]];                                                       \
+    }                                                                                                \
 }
 
-DEFINE_REMAP1( 8, 1)
-DEFINE_REMAP1(16, 2)
+DEFINE_REMAP1_LINE( 8, 1)
+DEFINE_REMAP1_LINE(16, 2)
 
 typedef struct XYRemap {
     uint16_t u[4][4];
@@ -304,9 +202,8 @@ typedef struct XYRemap {
  *
  * @param ws size of interpolation window
  * @param bits number of bits per pixel
- * @param div number of bytes per pixel
  */
-#define DEFINE_REMAP(ws, bits, div)                                                                        \
+#define DEFINE_REMAP(ws, bits)                                                                             \
 static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)          \
 {                                                                                                          \
     ThreadData *td = (ThreadData*)arg;                                                                     \
@@ -314,48 +211,87 @@ static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jo
     const AVFrame *in = td->in;                                                                            \
     AVFrame *out = td->out;                                                                                \
                                                                                                            \
-    int plane, x, y, i, j;                                                                                 \
-                                                                                                           \
-    for (plane = 0; plane < s->nb_planes; plane++) {                                                       \
-        const int in_linesize  = in->linesize[plane]  / div;                                               \
-        const int out_linesize = out->linesize[plane] / div;                                               \
-        const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane];                               \
-        uint##bits##_t *dst = (uint##bits##_t *)out->data[plane];                                          \
+    for (int plane = 0; plane < s->nb_planes; plane++) {                                                   \
+        const int in_linesize  = in->linesize[plane];                                                      \
+        const int out_linesize = out->linesize[plane];                                                     \
+        const uint8_t *src = in->data[plane];                                                              \
+        uint8_t *dst = out->data[plane];                                                                   \
         const int width = s->planewidth[plane];                                                            \
         const int height = s->planeheight[plane];                                                          \
                                                                                                            \
         const int slice_start = (height *  jobnr     ) / nb_jobs;                                          \
         const int slice_end   = (height * (jobnr + 1)) / nb_jobs;                                          \
                                                                                                            \
-        for (y = slice_start; y < slice_end; y++) {                                                        \
-            uint##bits##_t *d = dst + y * out_linesize;                                                    \
+        for (int y = slice_start; y < slice_end; y++) {                                                    \
             const uint16_t *u = s->u[plane] + y * width * ws * ws;                                         \
             const uint16_t *v = s->v[plane] + y * width * ws * ws;                                         \
             const int16_t *ker = s->ker[plane] + y * width * ws * ws;                                      \
-            for (x = 0; x < width; x++) {                                                                  \
-                const uint16_t *uu = u + x * ws * ws;                                                      \
-                const uint16_t *vv = v + x * ws * ws;                                                      \
-                const int16_t *kker = ker + x * ws * ws;                                                   \
-                int tmp = 0;                                                                               \
-                                                                                                           \
-                for (i = 0; i < ws; i++) {                                                                 \
-                    for (j = 0; j < ws; j++) {                                                             \
-                        tmp += kker[i * ws + j] * src[vv[i * ws + j] * in_linesize + uu[i * ws + j]];      \
-                    }                                                                                      \
-                }                                                                                          \
                                                                                                            \
-                *d++ = av_clip_uint##bits(tmp >> (15 - ws));                                               \
-            }                                                                                              \
+            s->remap_line(dst + y * out_linesize, width, src, in_linesize, u, v, ker);                     \
         }                                                                                                  \
     }                                                                                                      \
                                                                                                            \
     return 0;                                                                                              \
 }
 
-DEFINE_REMAP(2,  8, 1)
-DEFINE_REMAP(4,  8, 1)
-DEFINE_REMAP(2, 16, 2)
-DEFINE_REMAP(4, 16, 2)
+DEFINE_REMAP(1,  8)
+DEFINE_REMAP(2,  8)
+DEFINE_REMAP(4,  8)
+DEFINE_REMAP(1, 16)
+DEFINE_REMAP(2, 16)
+DEFINE_REMAP(4, 16)
+
+#define DEFINE_REMAP_LINE(ws, bits, div)                                                                   \
+static void remap##ws##_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src,                    \
+                                           ptrdiff_t in_linesize,                                          \
+                                           const uint16_t *u, const uint16_t *v, const int16_t *ker)       \
+{                                                                                                          \
+    const uint##bits##_t *s = (const uint##bits##_t *)src;                                                 \
+    uint##bits##_t *d = (uint##bits##_t *)dst;                                                             \
+                                                                                                           \
+    in_linesize /= div;                                                                                    \
+                                                                                                           \
+    for (int x = 0; x < width; x++) {                                                                      \
+        const uint16_t *uu = u + x * ws * ws;                                                              \
+        const uint16_t *vv = v + x * ws * ws;                                                              \
+        const int16_t *kker = ker + x * ws * ws;                                                           \
+        int tmp = 0;                                                                                       \
+                                                                                                           \
+        for (int i = 0; i < ws; i++) {                                                                     \
+            for (int j = 0; j < ws; j++) {                                                                 \
+                tmp += kker[i * ws + j] * s[vv[i * ws + j] * in_linesize + uu[i * ws + j]];                \
+            }                                                                                              \
+        }                                                                                                  \
+                                                                                                           \
+        d[x] = av_clip_uint##bits(tmp >> (15 - ws));                                                       \
+    }                                                                                                      \
+}
+
+DEFINE_REMAP_LINE(2,  8, 1)
+DEFINE_REMAP_LINE(4,  8, 1)
+DEFINE_REMAP_LINE(2, 16, 2)
+DEFINE_REMAP_LINE(4, 16, 2)
+
+void ff_v360_init(V360Context *s, int depth)
+{
+    switch (s->interp) {
+    case NEAREST:
+        s->remap_line = depth <= 8 ? remap1_8bit_line_c : remap1_16bit_line_c;
+        break;
+    case BILINEAR:
+        s->remap_line = depth <= 8 ? remap2_8bit_line_c : remap2_16bit_line_c;
+        break;
+    case BICUBIC:
+        s->remap_line = depth <= 8 ? remap4_8bit_line_c : remap4_16bit_line_c;
+        break;
+    case LANCZOS:
+        s->remap_line = depth <= 8 ? remap4_8bit_line_c : remap4_16bit_line_c;
+        break;
+    }
+
+    if (ARCH_X86_64)
+        ff_v360_init_x86(s, depth);
+}
 
 /**
  * Save nearest pixel coordinates for remapping.
@@ -2038,6 +1974,8 @@ static int config_output(AVFilterLink *outlink)
         av_assert0(0);
     }
 
+    ff_v360_init(s, depth);
+
     switch (s->in) {
     case EQUIRECTANGULAR:
         in_transform = xyz_to_equirect;
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 8dc0b0e6d4..f12993e606 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_TBLEND_FILTER)                 += x86/vf_blend_init.o
 OBJS-$(CONFIG_THRESHOLD_FILTER)              += x86/vf_threshold_init.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += x86/vf_tinterlace_init.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
+OBJS-$(CONFIG_V360_FILTER)                   += x86/vf_v360_init.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += x86/vf_w3fdif_init.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 
@@ -66,5 +67,6 @@ X86ASM-OBJS-$(CONFIG_TBLEND_FILTER)          += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER)       += x86/vf_threshold.o
 X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER)      += x86/vf_interlace.o
 X86ASM-OBJS-$(CONFIG_VOLUME_FILTER)          += x86/af_volume.o
+X86ASM-OBJS-$(CONFIG_V360_FILTER)            += x86/vf_v360.o
 X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER)          += x86/vf_w3fdif.o
 X86ASM-OBJS-$(CONFIG_YADIF_FILTER)           += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm
new file mode 100644
index 0000000000..46142a3bad
--- /dev/null
+++ b/libavfilter/x86/vf_v360.asm
@@ -0,0 +1,104 @@
+;*****************************************************************************
+;* x86-optimized functions for v360 filter
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+ALIGN 32
+
+pb_mask: db 0,4,8,12,5,5,5,5,5,5,5,5,5,5,5,5
+pd_255: times 4 dd 255
+pb_255: times 16 db 255
+
+SECTION .text
+
+; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
+;                               const uint16_t *u, const uint16_t *v, const int16_t *ker);
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal remap1_8bit_line, 7, 7, 7, dst, width, src, in_linesize, u, v, x
+    movsxdifnidn widthq, widthd
+    movsxdifnidn in_linesizeq, in_linesized
+    xor             xq, xq
+    movd           xm0, in_linesized
+    VBROADCASTI128  m4, [pb_255]
+    VBROADCASTI128  m6, [pb_mask]
+    vpbroadcastd    m0, xm0
+
+    .loop:
+        vpmovsxwd   m1, [vq + xq * 2]
+        vpmovsxwd   m2, [uq + xq * 2]
+
+        vpmulld          m3, m1, m0
+        vpaddd           m1, m3, m2
+        mova             m2, m4
+        vpgatherdd       m5, [srcq + m1], m2
+        vextracti128    xm3, m5, 1
+        vpshufb          m1, m5, m6
+        vpshufb          m2, m3, m6
+        movd      [dstq+xq], xm1
+        movd    [dstq+xq+4], xm2
+
+        add   xq, mmsize / 4
+        cmp   xq, widthq
+        jl .loop
+    RET
+
+INIT_YMM avx2
+cglobal remap2_8bit_line, 7, 9, 9, dst, width, src, in_linesize, u, v, ker, x, temp
+    movsxdifnidn widthq, widthd
+    movsxdifnidn in_linesizeq, in_linesized
+    xor             xq, xq
+    movd           xm0, in_linesized
+    VBROADCASTI128  m7, [pb_255]
+    vpbroadcastd    m0, xm0
+    movd           xm6, [pd_255]
+    vpbroadcastd    m6, xm6
+
+    .loop:
+        vpmovsxwd  m1, [kerq + xq * 8]
+        vpmovsxwd  m2, [vq + xq * 8]
+        vpmovsxwd  m3, [uq + xq * 8]
+
+        vpmulld         m4, m2, m0
+        vpaddd          m4, m3
+        mova            m3, m7
+        vpgatherdd      m5, [srcq + m4], m3
+        vpand           m5, m6
+        vpmulld         m5, m1
+        vphaddd         m2, m5, m5
+        vphaddd         m5, m2, m2
+        vpsrld          m5, m5, 0xd
+        vextracti128   xm3, m5, 1
+        vpshufb         m5, m5, [pb_mask]
+        vpshufb         m3, m3, [pb_mask]
+
+        movd          tempd, xm5
+        mov       [dstq+xq], tempb
+        movd          tempd, xm3
+        mov     [dstq+xq+1], tempb
+
+        add   xq, mmsize / 16
+        cmp   xq, widthq
+        jl .loop
+    RET
+%endif
diff --git a/libavfilter/x86/vf_v360_init.c b/libavfilter/x86/vf_v360_init.c
new file mode 100644
index 0000000000..e48ee307b3
--- /dev/null
+++ b/libavfilter/x86/vf_v360_init.c
@@ -0,0 +1,43 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/v360.h"
+
+void ff_remap1_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
+                              const uint16_t *u, const uint16_t *v, const int16_t *ker);
+
+void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
+                              const uint16_t *u, const uint16_t *v, const int16_t *ker);
+
+av_cold void ff_v360_init_x86(V360Context *s, int depth)
+{
+#if ARCH_X86_64
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_AVX2(cpu_flags) && s->interp == NEAREST && depth <= 8)
+        s->remap_line = ff_remap1_8bit_line_avx2;
+
+    if (EXTERNAL_AVX2(cpu_flags) && s->interp == BILINEAR && depth <= 8)
+        s->remap_line = ff_remap2_8bit_line_avx2;
+#endif
+}
-- 
2.17.1



More information about the ffmpeg-devel mailing list