FFmpeg
vf_nlmeans.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Clément Bœsch <u pkh me>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <math.h>
22 #include "checkasm.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/mem.h"
26 #include "libavutil/mem_internal.h"
27 
28 #define randomize_buffer(buf, size) do { \
29  int i; \
30  for (i = 0; i < size / 4; i++) \
31  ((uint32_t *)buf)[i] = rnd(); \
32 } while (0)
33 
35 {
36  NLMeansDSPContext dsp = {0};
37 
38  const int w = 123; // source width
39  const int h = 45; // source height
40  const int p = 3; // patch half size
41  const int r = 2; // research window half size
42 
43  ff_nlmeans_init(&dsp);
44 
45  /* See the filter's code for the explanations on the variables */
46  if (check_func(dsp.compute_safe_ssd_integral_image, "ssd_integral_image")) {
47  int offx, offy;
48  const int e = p + r;
49  const int ii_w = w + e*2;
50  const int ii_h = h + e*2;
51  const int ii_lz_32 = FFALIGN(ii_w + 1, 4);
52  uint32_t *ii_orig_ref = av_calloc(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_ref));
53  uint32_t *ii_ref = ii_orig_ref + ii_lz_32 + 1;
54  uint32_t *ii_orig_new = av_calloc(ii_h + 1, ii_lz_32 * sizeof(*ii_orig_new));
55  uint32_t *ii_new = ii_orig_new + ii_lz_32 + 1;
56  const int src_lz = FFALIGN(w, 16);
57  uint8_t *src = av_calloc(h, src_lz);
58 
59  declare_func(void, uint32_t *dst, ptrdiff_t dst_linesize_32,
60  const uint8_t *s1, ptrdiff_t linesize1,
61  const uint8_t *s2, ptrdiff_t linesize2,
62  int w, int h);
63 
64  randomize_buffer(src, h * src_lz);
65 
66  for (offy = -r; offy <= r; offy++) {
67  for (offx = -r; offx <= r; offx++) {
68  if (offx || offy) {
69  const int s1x = e;
70  const int s1y = e;
71  const int s2x = e + offx;
72  const int s2y = e + offy;
73  const int startx_safe = FFMAX(s1x, s2x);
74  const int starty_safe = FFMAX(s1y, s2y);
75  const int u_endx_safe = FFMIN(s1x + w, s2x + w);
76  const int endy_safe = FFMIN(s1y + h, s2y + h);
77  const int safe_pw = (u_endx_safe - startx_safe) & ~0xf;
78  const int safe_ph = endy_safe - starty_safe;
79 
80  av_assert0(safe_pw && safe_ph);
81  av_assert0(startx_safe - s1x >= 0); av_assert0(startx_safe - s1x < w);
82  av_assert0(starty_safe - s1y >= 0); av_assert0(starty_safe - s1y < h);
83  av_assert0(startx_safe - s2x >= 0); av_assert0(startx_safe - s2x < w);
84  av_assert0(starty_safe - s2y >= 0); av_assert0(starty_safe - s2y < h);
85 
86  memset(ii_ref, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref));
87  memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
88 
89  call_ref(ii_ref + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
90  src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
91  src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
92  safe_pw, safe_ph);
93  call_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
94  src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
95  src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
96  safe_pw, safe_ph);
97 
98  if (memcmp(ii_ref, ii_new, (ii_lz_32 * ii_h - 1) * sizeof(*ii_ref)))
99  fail();
100 
101  memset(ii_new, 0, (ii_lz_32 * ii_h - 1) * sizeof(*ii_new));
102  bench_new(ii_new + starty_safe*ii_lz_32 + startx_safe, ii_lz_32,
103  src + (starty_safe - s1y) * src_lz + (startx_safe - s1x), src_lz,
104  src + (starty_safe - s2y) * src_lz + (startx_safe - s2x), src_lz,
105  safe_pw, safe_ph);
106  }
107  }
108  }
109 
110  av_freep(&ii_orig_ref);
111  av_freep(&ii_orig_new);
112  av_freep(&src);
113  }
114 
115  if (check_func(dsp.compute_weights_line, "compute_weights_line")) {
116 #define TEST_W 256
117 #define MAX_MEANINGFUL_DIFF 255
118  const int startx = 10;
119  const int endx = 200;
120 
121  // Allocate aligned buffers on stack
122  LOCAL_ALIGNED_32(uint32_t, iia, [TEST_W + 16]);
123  LOCAL_ALIGNED_32(uint32_t, iib, [TEST_W + 16]);
124  LOCAL_ALIGNED_32(uint32_t, iid, [TEST_W + 16]);
125  LOCAL_ALIGNED_32(uint32_t, iie, [TEST_W + 16]);
126  LOCAL_ALIGNED_32(uint8_t, src, [TEST_W + 16]);
127  LOCAL_ALIGNED_32(float, tw_ref, [TEST_W + 16]);
128  LOCAL_ALIGNED_32(float, tw_new, [TEST_W + 16]);
129  LOCAL_ALIGNED_32(float, sum_ref, [TEST_W + 16]);
130  LOCAL_ALIGNED_32(float, sum_new, [TEST_W + 16]);
131  LOCAL_ALIGNED_32(float, lut, [MAX_MEANINGFUL_DIFF + 1]);
132 
133  declare_func(void, const uint32_t *const iia,
134  const uint32_t *const iib,
135  const uint32_t *const iid,
136  const uint32_t *const iie,
137  const uint8_t *const src,
138  float *total_weight,
139  float *sum,
140  const float *const weight_lut,
141  ptrdiff_t max_meaningful_diff,
142  ptrdiff_t startx, ptrdiff_t endx);
143 
144  // Initialize LUT: weight = exp(-diff * scale)
145  // Using scale = 0.01 for testing
146  for (int i = 0; i <= MAX_MEANINGFUL_DIFF; i++)
147  lut[i] = expf(-i * 0.01f);
148 
149  // Initialize source pixels
150  for (int i = 0; i < TEST_W; i++)
151  src[i] = rnd() & 0xff;
152 
153  // Initialize integral images
154  // We need to ensure diff = e - d - b + a is non-negative and within range
155  // Set up as if computing real integral image values
156  for (int i = 0; i < TEST_W; i++) {
157  uint32_t base = rnd() % 1000;
158  iia[i] = base;
159  iib[i] = base + (rnd() % 100);
160  iid[i] = base + (rnd() % 100);
161  // e = a + (b - a) + (d - a) + diff
162  // So diff = e - d - b + a will be in range [0, max_meaningful_diff]
163  uint32_t diff = rnd() % (MAX_MEANINGFUL_DIFF + 1);
164  iie[i] = iia[i] + (iib[i] - iia[i]) + (iid[i] - iia[i]) + diff;
165  }
166 
167  // Clear output buffers
168  memset(tw_ref, 0, (TEST_W + 16) * sizeof(float));
169  memset(tw_new, 0, (TEST_W + 16) * sizeof(float));
170  memset(sum_ref, 0, (TEST_W + 16) * sizeof(float));
171  memset(sum_new, 0, (TEST_W + 16) * sizeof(float));
172 
173  call_ref(iia, iib, iid, iie, src, tw_ref, sum_ref, lut,
175  call_new(iia, iib, iid, iie, src, tw_new, sum_new, lut,
177 
178  // Compare results with small tolerance for floating point
179  if (!float_near_abs_eps_array(tw_ref + startx, tw_new + startx, 1e-5f, endx - startx))
180  fail();
181  if (!float_near_abs_eps_array(sum_ref + startx, sum_new + startx, 1e-4f, endx - startx))
182  fail();
183 
184  // Benchmark
185  memset(tw_new, 0, (TEST_W + 16) * sizeof(float));
186  memset(sum_new, 0, (TEST_W + 16) * sizeof(float));
187  bench_new(iia, iib, iid, iie, src, tw_new, sum_new, lut,
189  }
190 
191  report("dsp");
192 }
r
const char * r
Definition: vf_curves.c:127
mem_internal.h
float_near_abs_eps_array
int float_near_abs_eps_array(const float *a, const float *b, float eps, unsigned len)
Definition: checkasm.c:516
NLMeansDSPContext::compute_weights_line
void(* compute_weights_line)(const uint32_t *const iia, const uint32_t *const iib, const uint32_t *const iid, const uint32_t *const iie, const uint8_t *const src, float *total_weight, float *sum, const float *const weight_lut, ptrdiff_t max_meaningful_diff, ptrdiff_t startx, ptrdiff_t endx)
Definition: vf_nlmeans.h:30
check_func
#define check_func(func,...)
Definition: checkasm.h:206
expf
#define expf(x)
Definition: libm.h:285
base
uint8_t base
Definition: vp3data.h:128
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
call_ref
#define call_ref(...)
Definition: checkasm.h:222
TEST_W
#define TEST_W
NLMeansDSPContext
Definition: vf_nlmeans.h:25
fail
#define fail()
Definition: checkasm.h:216
checkasm.h
NLMeansDSPContext::compute_safe_ssd_integral_image
void(* compute_safe_ssd_integral_image)(uint32_t *dst, ptrdiff_t dst_linesize_32, const uint8_t *s1, ptrdiff_t linesize1, const uint8_t *s2, ptrdiff_t linesize2, int w, int h)
Definition: vf_nlmeans.h:26
avassert.h
rnd
#define rnd()
Definition: checkasm.h:199
MAX_MEANINGFUL_DIFF
#define MAX_MEANINGFUL_DIFF
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:41
thread_data::endx
int endx
Definition: vf_nlmeans.c:283
call_new
#define call_new(...)
Definition: checkasm.h:230
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
thread_data::startx
int startx
Definition: vf_nlmeans.c:282
f
f
Definition: af_crystalizer.c:122
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
vf_nlmeans_init.h
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:166
xf
#define xf(width, name, var, range_min, range_max, subs,...)
Definition: cbs_av1.c:622
randomize_buffer
#define randomize_buffer(buf, size)
Definition: vf_nlmeans.c:28
report
#define report
Definition: checkasm.h:219
bench_new
#define bench_new(...)
Definition: checkasm.h:421
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_nlmeans_init
static av_unused void ff_nlmeans_init(NLMeansDSPContext *dsp)
Definition: vf_nlmeans_init.h:127
checkasm_check_nlmeans
void checkasm_check_nlmeans(void)
Definition: vf_nlmeans.c:34
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
mem.h
w
uint8_t w
Definition: llvidencdsp.c:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:211
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
h
h
Definition: vp9dsp_template.c:2070
src
#define src
Definition: vp8dsp.c:248