[FFmpeg-devel] [PATCHv2 4/4] avfilter/vf_framerate: add SIMD functions for frame blending

James Almer jamrial at gmail.com
Thu Jan 18 23:16:13 EET 2018


On 1/18/2018 6:06 PM, Marton Balint wrote:
> Blend function speedups on x86_64 Core i5 4460:
> 
> ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none
> 
> C:     447548411 decicycles in Blend,    2048 runs,      0 skips
> SSSE3: 130020087 decicycles in Blend,    2048 runs,      0 skips
> AVX2:  128508221 decicycles in Blend,    2048 runs,      0 skips
> 
> ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f null none
> 
> C:     228932745 decicycles in Blend,    2048 runs,      0 skips
> SSE4:  123357781 decicycles in Blend,    2048 runs,      0 skips
> AVX2:  121215353 decicycles in Blend,    2048 runs,      0 skips
> 
> Signed-off-by: Marton Balint <cus at passwd.hu>
> ---
>  libavfilter/vf_framerate.c       |  24 ++++++-
>  libavfilter/x86/Makefile         |   1 +
>  libavfilter/x86/vf_framerate.asm | 136 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 158 insertions(+), 3 deletions(-)
>  create mode 100644 libavfilter/x86/vf_framerate.asm
> 
> diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
> index d315ef5d09..6a3b85910f 100644
> --- a/libavfilter/vf_framerate.c
> +++ b/libavfilter/vf_framerate.c
> @@ -29,11 +29,13 @@
>  #define DEBUG
>  
>  #include "libavutil/avassert.h"
> +#include "libavutil/cpu.h"
>  #include "libavutil/imgutils.h"
>  #include "libavutil/internal.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/pixelutils.h"
> +#include "libavutil/x86/cpu.h"
>  
>  #include "avfilter.h"
>  #include "internal.h"
> @@ -246,7 +248,7 @@ static int blend_frames(AVFilterContext *ctx, int interpolate)
>          av_frame_copy_props(s->work, s->f0);
>  
>          ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n");
> -        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
> +        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, outlink->h >> 2), ff_filter_get_nb_threads(ctx)));
>          return 1;
>      }
>      return 0;
> @@ -347,6 +349,11 @@ static void blend_frames_c(BLEND_FUNC_PARAMS)
>      }
>  }
>  
> +void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS);
> +void ff_blend_frames_avx2(BLEND_FUNC_PARAMS);
> +void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS);
> +void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS);
> +
>  static void blend_frames16_c(BLEND_FUNC_PARAMS)
>  {
>      int line, pixel;
> @@ -371,6 +378,7 @@ static int config_input(AVFilterLink *inlink)
>      AVFilterContext *ctx = inlink->dst;
>      FrameRateContext *s = ctx->priv;
>      const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
> +    int cpu_flags = av_get_cpu_flags();
>      int plane;
>  
>      for (plane = 0; plane < 4; plane++) {
> @@ -389,10 +397,20 @@ static int config_input(AVFilterLink *inlink)
>  
>      if (s->bitdepth == 8) {
>          s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8;
> -        s->blend = blend_frames_c;
> +        if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
> +            s->blend = ff_blend_frames_avx2;
> +        else if (ARCH_X86 && EXTERNAL_SSSE3(cpu_flags))
> +            s->blend = ff_blend_frames_ssse3;
> +        else
> +            s->blend = blend_frames_c;
>      } else {
>          s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16;
> -        s->blend = blend_frames16_c;
> +        if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
> +            s->blend = ff_blend_frames16_avx2;
> +        else if (ARCH_X86 && EXTERNAL_SSE4(cpu_flags))
> +            s->blend = ff_blend_frames16_sse4;
> +        else
> +            s->blend = blend_frames16_c;

The simd function pointer initialization and the respective prototypes
should be in a separate file in the x86 folder. In here you should only
have something like

if (ARCH_X86)
    ff_blend_frames_init_x86(s);

Then the corresponding pointer initialization inside that function. The
prototype for ff_blend_frames_init_x86() should be in a new header.

See how vf_blend (and many other filters) do.

>      }
>  
>      return 0;


More information about the ffmpeg-devel mailing list