[FFmpeg-devel] [PATCH 2/9] lavfi/nlmeans: add SIMD-friendly assumptions for compute_safe_ssd_integral_image

Michael Niedermayer michael at niedermayer.cc
Mon May 7 01:14:37 EEST 2018


On Sun, May 06, 2018 at 01:40:53PM +0200, Clément Bœsch wrote:
> SIMD code will not have to deal with padding itself. Overwriting in that
> function may have been possible but involve large overreading of the
> sources. Instead, we simply make sure the width to process is always a
> multiple of 16. Additionally, there must be some actual area to process
> so the SIMD code can have its boundary checks after processing the first
> pixels.
> ---
>  libavfilter/vf_nlmeans.c | 25 ++++++++++++++++++-------
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
> index d222d3913e..21f981a605 100644
> --- a/libavfilter/vf_nlmeans.c
> +++ b/libavfilter/vf_nlmeans.c
> @@ -157,6 +157,9 @@ static void compute_safe_ssd_integral_image_c(uint32_t *dst, int dst_linesize_32
>  {
>      int x, y;
>  
> +    /* SIMD-friendly assumptions allowed here */
> +    av_assert2(!(w & 0xf) && w >= 16 && h >= 1);
> +
>      for (y = 0; y < h; y++) {
>          uint32_t acc = dst[-1] - dst[-dst_linesize_32 - 1];
>  
> @@ -257,9 +260,16 @@ static void compute_ssd_integral_image(uint32_t *ii, int ii_linesize_32,
>      // to compare the 2 sources pixels
>      const int startx_safe = FFMAX(s1x, s2x);
>      const int starty_safe = FFMAX(s1y, s2y);
> -    const int endx_safe   = FFMIN(s1x + w, s2x + w);
> +    const int u_endx_safe = FFMIN(s1x + w, s2x + w); // unaligned
>      const int endy_safe   = FFMIN(s1y + h, s2y + h);
>  
> +    // deduce the safe area width and height
> +    const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
> +    const int safe_ph = endy_safe - starty_safe;
> +
> +    // adjusted end x position of the safe area after width of the safe area gets aligned
> +    const int endx_safe = startx_safe + safe_pw;
> +
>      // top part where only one of s1 and s2 is still readable, or none at all
>      compute_unsafe_ssd_integral_image(ii, ii_linesize_32,
>                                        0, 0,
> @@ -273,24 +283,25 @@ static void compute_ssd_integral_image(uint32_t *ii, int ii_linesize_32,
>                                        0, starty_safe,
>                                        src, linesize,
>                                        offx, offy, e, w, h,
> -                                      startx_safe, endy_safe - starty_safe);
> +                                      startx_safe, safe_ph);
>  
>      // main and safe part of the integral
>      av_assert1(startx_safe - s1x >= 0); av_assert1(startx_safe - s1x < w);
>      av_assert1(starty_safe - s1y >= 0); av_assert1(starty_safe - s1y < h);
>      av_assert1(startx_safe - s2x >= 0); av_assert1(startx_safe - s2x < w);
>      av_assert1(starty_safe - s2y >= 0); av_assert1(starty_safe - s2y < h);
> -    compute_safe_ssd_integral_image_c(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
> -                                      src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
> -                                      src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
> -                                      endx_safe - startx_safe, endy_safe - starty_safe);
> +    if (safe_pw && safe_ph)
> +        dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
> +                                             src + (starty_safe - s1y) * linesize + (startx_safe - s1x), linesize,
> +                                             src + (starty_safe - s2y) * linesize + (startx_safe - s2x), linesize,
> +                                             safe_pw, safe_ph);


i think this is or i am missing some change

libavfilter/vf_nlmeans.c: In function ‘compute_ssd_integral_image’:
libavfilter/vf_nlmeans.c:294:9: error: ‘dsp’ undeclared (first use in this function)
         dsp->compute_safe_ssd_integral_image(ii + starty_safe*ii_linesize_32 + startx_safe, ii_linesize_32,
         ^
libavfilter/vf_nlmeans.c:294:9: note: each undeclared identifier is reported only once for each function it appears in
libavfilter/vf_nlmeans.c: At top level:
libavfilter/vf_nlmeans.c:153:13: warning: ‘compute_safe_ssd_integral_image_c’ defined but not used [-Wunused-function]
 static void compute_safe_ssd_integral_image_c(uint32_t *dst, int dst_linesize_32,
             ^
make: *** [libavfilter/vf_nlmeans.o] Error 1
make: *** Waiting for unfinished jobs....

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180507/6a2fa315/attachment.sig>


More information about the ffmpeg-devel mailing list