[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD
Paul B Mahol
onemda at gmail.com
Sat Dec 2 00:02:43 EET 2017
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
libavfilter/hflip.h | 38 +++++++++++++++++++++++++
libavfilter/vf_hflip.c | 30 ++++++++++++++------
libavfilter/x86/Makefile | 2 ++
libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++
libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
5 files changed, 160 insertions(+), 9 deletions(-)
create mode 100644 libavfilter/hflip.h
create mode 100644 libavfilter/x86/vf_hflip.asm
create mode 100644 libavfilter/x86/vf_hflip_init.c
diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h
new file mode 100644
index 0000000000..138380427c
--- /dev/null
+++ b/libavfilter/hflip.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2007 Benoit Fouet
+ * Copyright (c) 2010 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_HFLIP_H
+#define AVFILTER_HFLIP_H
+
+#include "avfilter.h"
+
+typedef struct FlipContext {
+ const AVClass *class;
+ int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes
+ int planewidth[4]; ///< width of each plane
+ int planeheight[4]; ///< height of each plane
+
+ void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w);
+} FlipContext;
+
+void ff_hflip_init_x86(FlipContext *s, int step[4]);
+
+#endif /* AVFILTER_HFLIP_H */
diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
index cf20c193f7..65cf7c5cd1 100644
--- a/libavfilter/vf_hflip.c
+++ b/libavfilter/vf_hflip.c
@@ -29,6 +29,7 @@
#include "libavutil/opt.h"
#include "avfilter.h"
#include "formats.h"
+#include "hflip.h"
#include "internal.h"
#include "video.h"
#include "libavutil/pixdesc.h"
@@ -36,13 +37,6 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/imgutils.h"
-typedef struct FlipContext {
- const AVClass *class;
- int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes
- int planewidth[4]; ///< width of each plane
- int planeheight[4]; ///< height of each plane
-} FlipContext;
-
static const AVOption hflip_options[] = {
{ NULL }
};
@@ -67,12 +61,21 @@ static int query_formats(AVFilterContext *ctx)
return ff_set_common_formats(ctx, pix_fmts);
}
+static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
+{
+ int j;
+
+ for (j = 0; j < w; j++)
+ dst[j] = src[-j];
+}
+
static int config_props(AVFilterLink *inlink)
{
FlipContext *s = inlink->dst->priv;
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
const int hsub = pix_desc->log2_chroma_w;
const int vsub = pix_desc->log2_chroma_h;
+ int i;
av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
s->planewidth[0] = s->planewidth[3] = inlink->w;
@@ -80,6 +83,16 @@ static int config_props(AVFilterLink *inlink)
s->planeheight[0] = s->planeheight[3] = inlink->h;
s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+ for (i = 0; i < 4; i++) {
+ switch (s->max_step[i]) {
+ case 1:
+ s->flip_line[i] = hflip_byte_c;
+ }
+ }
+
+ if (ARCH_X86)
+ ff_hflip_init_x86(s, s->max_step);
+
return 0;
}
@@ -109,8 +122,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
for (i = start; i < end; i++) {
switch (step) {
case 1:
- for (j = 0; j < width; j++)
- outrow[j] = inrow[-j];
+ s->flip_line[plane](inrow, outrow, width);
break;
case 2:
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 3431625883..1420954f62 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
+OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o
@@ -31,6 +32,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o
X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
new file mode 100644
index 0000000000..bc52a16ad8
--- /dev/null
+++ b/libavfilter/x86/vf_hflip.asm
@@ -0,0 +1,61 @@
+;*****************************************************************************
+;* x86-optimized functions for hflip filter
+;*
+;* Copyright (C) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+
+SECTION .text
+
+INIT_XMM ssse3
+%if ARCH_X86_64
+cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v
+ mova m0, [pb_flip]
+ mov xq, 0
+ sub wq, mmsize
+ cmp wq, mmsize
+ jl .skip
+
+ .loop0:
+ neg xq
+ movu m1, [srcq + xq - mmsize + 1]
+ pshufb m1, m0
+ neg xq
+ movu [dstq + xq], m1
+ add xq, mmsize
+ cmp xq, wq
+ jl .loop0
+
+.skip:
+ add wq, mmsize
+ .loop1:
+ neg xq
+ mov vb, [srcq + xq]
+ neg xq
+ mov [dstq + xq], vb
+ add xq, 1
+ cmp xq, wq
+ jl .loop1
+RET
+%endif
diff --git a/libavfilter/x86/vf_hflip_init.c b/libavfilter/x86/vf_hflip_init.c
new file mode 100644
index 0000000000..cd0e18f7ee
--- /dev/null
+++ b/libavfilter/x86/vf_hflip_init.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/hflip.h"
+
+void ff_hflip_byte_ssse3(const uint8_t *src, uint8_t *dst, int w);
+
+av_cold void ff_hflip_init_x86(FlipContext *s, int step[4])
+{
+ int cpu_flags = av_get_cpu_flags();
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (EXTERNAL_SSSE3(cpu_flags) && step[i] == 1) {
+ s->flip_line[i] = ff_hflip_byte_ssse3;
+ }
+ }
+}
--
2.11.0
More information about the ffmpeg-devel
mailing list