[FFmpeg-devel] [PATCH 4/4] vf_ssim: x86 simd for ssim_4x4xN and ssim_endN.

Paul B Mahol onemda at gmail.com
Mon Jul 13 00:09:58 CEST 2015


On 7/11/15, Ronald S. Bultje <rsbultje at gmail.com> wrote:
> Both are 2-2.5x faster than their C counterpart.
> ---
>  libavfilter/ssim.h             |  36 ++++++++
>  libavfilter/vf_ssim.c          |  26 ++++--
>  libavfilter/x86/Makefile       |   2 +
>  libavfilter/x86/vf_ssim.asm    | 190
> +++++++++++++++++++++++++++++++++++++++++
>  libavfilter/x86/vf_ssim_init.c |  38 +++++++++
>  5 files changed, 283 insertions(+), 9 deletions(-)
>  create mode 100644 libavfilter/ssim.h
>  create mode 100644 libavfilter/x86/vf_ssim.asm
>  create mode 100644 libavfilter/x86/vf_ssim_init.c
>
> diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h
> new file mode 100644
> index 0000000..cd3a6ee
> --- /dev/null
> +++ b/libavfilter/ssim.h
> @@ -0,0 +1,36 @@
> +/*
> + * Copyright (c) 2015 Ronald S. Bultje <rsbultje at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef LIBAVFILTER_SSIM_H
> +#define LIBAVFILTER_SSIM_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +typedef struct SSIMDSPContext {
> +    void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
> +                          const uint8_t *ref, ptrdiff_t ref_stride,
> +                          int (*sums)[4], int w);
> +    float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int
> w);
> +} SSIMDSPContext;
> +
> +void ff_ssim_init_x86(SSIMDSPContext *dsp);
> +
> +#endif /* LIBAVFILTER_SSIM_H */
> diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
> index f7a259e..b5a61ee 100644
> --- a/libavfilter/vf_ssim.c
> +++ b/libavfilter/vf_ssim.c
> @@ -42,6 +42,7 @@
>  #include "drawutils.h"
>  #include "formats.h"
>  #include "internal.h"
> +#include "ssim.h"
>  #include "video.h"
>
>  typedef struct SSIMContext {
> @@ -59,6 +60,7 @@ typedef struct SSIMContext {
>      int planeheight[4];
>      int *temp;
>      int is_rgb;
> +    SSIMDSPContext dsp;
>  } SSIMContext;
>
>  #define OFFSET(x) offsetof(SSIMContext, x)
> @@ -85,8 +87,8 @@ static void set_meta(AVDictionary **metadata, const char
> *key, char comp, float
>      }
>  }
>
> -static void ssim_4x4xn(const uint8_t *main, int main_stride,
> -                       const uint8_t *ref, int ref_stride,
> +static void ssim_4x4xn(const uint8_t *main, ptrdiff_t main_stride,
> +                       const uint8_t *ref, ptrdiff_t ref_stride,
>                         int (*sums)[4], int width)
>  {
>      int x, y, z;
> @@ -132,7 +134,7 @@ static float ssim_end1(int s1, int s2, int ss, int s12)
>           / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars +
> ssim_c2));
>  }
>
> -static float ssim_endn(int (*sum0)[4], int (*sum1)[4], int width)
> +static float ssim_endn(const int (*sum0)[4], const int (*sum1)[4], int
> width)
>  {
>      float ssim = 0.0;
>      int i;
> @@ -145,7 +147,8 @@ static float ssim_endn(int (*sum0)[4], int (*sum1)[4],
> int width)
>      return ssim;
>  }
>
> -static float ssim_plane(uint8_t *main, int main_stride,
> +static float ssim_plane(SSIMDSPContext *dsp,
> +                        uint8_t *main, int main_stride,
>                          uint8_t *ref, int ref_stride,
>                          int width, int height, void *temp)
>  {
> @@ -160,12 +163,12 @@ static float ssim_plane(uint8_t *main, int
> main_stride,
>      for (y = 1; y < height; y++) {
>          for (; z <= y; z++) {
>              FFSWAP(void*, sum0, sum1);
> -            ssim_4x4xn(&main[4 * z * main_stride], main_stride,
> -                       &ref[4 * z * ref_stride], ref_stride,
> -                       sum0, width);
> +            dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride,
> +                               &ref[4 * z * ref_stride], ref_stride,
> +                               sum0, width);
>          }
>
> -        ssim += ssim_endn(sum0, sum1, width - 1);
> +        ssim += dsp->ssim_end_line(sum0, sum1, width - 1);
>      }
>
>      return ssim / ((height - 1) * (width - 1));
> @@ -187,7 +190,7 @@ static AVFrame *do_ssim(AVFilterContext *ctx, AVFrame
> *main,
>      s->nb_frames++;
>
>      for (i = 0; i < s->nb_components; i++) {
> -        c[i] = ssim_plane(main->data[i], main->linesize[i],
> +        c[i] = ssim_plane(&s->dsp, main->data[i], main->linesize[i],
>                            ref->data[i], ref->linesize[i],
>                            s->planewidth[i], s->planeheight[i], s->temp);
>          ssimv += s->coefs[i] * c[i];
> @@ -294,6 +297,11 @@ static int config_input_ref(AVFilterLink *inlink)
>      if (!s->temp)
>          return AVERROR(ENOMEM);
>
> +    s->dsp.ssim_4x4_line = ssim_4x4xn;
> +    s->dsp.ssim_end_line = ssim_endn;
> +    if (ARCH_X86)
> +        ff_ssim_init_x86(&s->dsp);
> +
>      return 0;
>  }
>
> diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> index 89d3ca1..230e879 100644
> --- a/libavfilter/x86/Makefile
> +++ b/libavfilter/x86/Makefile
> @@ -9,6 +9,7 @@ OBJS-$(CONFIG_PP7_FILTER)                    +=
> x86/vf_pp7_init.o
>  OBJS-$(CONFIG_PSNR_FILTER)                   += x86/vf_psnr_init.o
>  OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
>  OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
> +OBJS-$(CONFIG_SSIM_FILTER)                   += x86/vf_ssim_init.o
>  OBJS-$(CONFIG_TINTERLACE_FILTER)             += x86/vf_tinterlace_init.o
>  OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
>  OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
> @@ -21,6 +22,7 @@ YASM-OBJS-$(CONFIG_INTERLACE_FILTER)         +=
> x86/vf_interlace.o
>  YASM-OBJS-$(CONFIG_PP7_FILTER)               += x86/vf_pp7.o
>  YASM-OBJS-$(CONFIG_PSNR_FILTER)              += x86/vf_psnr.o
>  YASM-OBJS-$(CONFIG_PULLUP_FILTER)            += x86/vf_pullup.o
> +YASM-OBJS-$(CONFIG_SSIM_FILTER)              += x86/vf_ssim.o
>  YASM-OBJS-$(CONFIG_TINTERLACE_FILTER)        += x86/vf_interlace.o
>  YASM-OBJS-$(CONFIG_VOLUME_FILTER)            += x86/af_volume.o
>  YASM-OBJS-$(CONFIG_YADIF_FILTER)             += x86/vf_yadif.o
> x86/yadif-16.o x86/yadif-10.o
> diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm
> new file mode 100644
> index 0000000..55bb645
> --- /dev/null
> +++ b/libavfilter/x86/vf_ssim.asm
> @@ -0,0 +1,190 @@
> +;*****************************************************************************
> +;* x86-optimized functions for interlace filter

Besides this above. patch lgtm. Unless someone have to comment to asm part.


More information about the ffmpeg-devel mailing list