FFmpeg
hevc_deblock.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <string.h>
20 
21 #include "libavutil/intreadwrite.h"
22 #include "libavutil/macros.h"
23 #include "libavutil/mem_internal.h"
24 
25 #include "libavcodec/hevc/dsp.h"
26 
27 #include "checkasm.h"
28 
29 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
30 
31 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
32 #define BUF_STRIDE (16 * 2)
33 #define BUF_LINES (16)
34 // large buffer sizes based on high bit depth
35 #define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
36 #define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)
37 
38 #define randomize_buffers(buf0, buf1, size) \
39  do { \
40  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
41  int k; \
42  for (k = 0; k < size; k += 4) { \
43  uint32_t r = rnd() & mask; \
44  AV_WN32A(buf0 + k, r); \
45  AV_WN32A(buf1 + k, r); \
46  } \
47  } while (0)
48 
50 {
51  // see tctable[] in hevc_filter.c, we check full range
52  int32_t tc[2] = { rnd() % 25, rnd() % 25 };
53  // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
54  // variant) functions, see deblocking_filter_CTB() in hevc_filter.c
55  uint8_t no_p[2] = { rnd() & c, rnd() & c };
56  uint8_t no_q[2] = { rnd() & c, rnd() & c };
57  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
58  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
59 
60  declare_func(void, uint8_t *pix, ptrdiff_t stride,
61  const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
62 
63  if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
64  "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
65  {
66  randomize_buffers(buf0, buf1, BUF_SIZE);
67 
68  call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
69  call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
70  if (memcmp(buf0, buf1, BUF_SIZE))
71  fail();
72  bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
73  }
74 
75  if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
76  "hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
77  {
78  randomize_buffers(buf0, buf1, BUF_SIZE);
79 
80  call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
81  call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
82  if (memcmp(buf0, buf1, BUF_SIZE))
83  fail();
84  bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
85  }
86 }
87 
88 #define P3 buf[-4 * xstride]
89 #define P2 buf[-3 * xstride]
90 #define P1 buf[-2 * xstride]
91 #define P0 buf[-1 * xstride]
92 #define Q0 buf[0 * xstride]
93 #define Q1 buf[1 * xstride]
94 #define Q2 buf[2 * xstride]
95 #define Q3 buf[3 * xstride]
96 
97 #define TC25(x) ((tc[x] * 5 + 1) >> 1)
98 #define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
99 #define GET(x) ((SIZEOF_PIXEL == 1) ? *(uint8_t*)(&x) : *(uint16_t*)(&x))
100 #define SET(x, y) do { \
101  uint16_t z = MASK(y); \
102  if (SIZEOF_PIXEL == 1) \
103  *(uint8_t*)(&x) = z; \
104  else \
105  *(uint16_t*)(&x) = z; \
106 } while (0)
107 #define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
108  (1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)
109 
110 // NOTE: this function doesn't work 'correctly' in that it won't always choose
111 // strong/strong or weak/weak, in most cases it tends to but will sometimes mix
112 // weak/strong or even skip sometimes. This is more useful to test correctness
113 // for these functions, though it does make benching them difficult. The easiest
114 // way to bench these functions is to check an overall decode since there are too
115 // many paths and ways to trigger the deblock: we would have to bench all
116 // permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
117 // too much.
118 static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
119  uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
120 {
121  int i, j, b3, tc25, tc25diff, b3diff;
122  // both tc & beta are unscaled inputs
123  // minimum useful value is 1, full range 0-24
124  tc[0] = (rnd() % 25) + 1;
125  tc[1] = (rnd() % 25) + 1;
126  // minimum useful value for 8bit is 8
127  *beta = (rnd() % 57) + 8;
128 
129  switch (type) {
130  case 0: // strong
131  for (j = 0; j < 2; j++) {
132  tc25 = TC25(j) << (bit_depth - 8);
133  tc25diff = FFMAX(tc25 - 1, 0);
134  // 4 lines per tc
135  for (i = 0; i < 4; i++) {
136  b3 = (*beta << (bit_depth - 8)) >> 3;
137 
138  SET(P0, rnd() % (1 << bit_depth));
139  SET(Q0, RANDCLIP(P0, tc25diff));
140 
141  // p3 - p0 up to beta3 budget
142  b3diff = rnd() % b3;
143  SET(P3, RANDCLIP(P0, b3diff));
144  // q3 - q0, reduced budget
145  b3diff = rnd() % FFMAX(b3 - b3diff, 1);
146  SET(Q3, RANDCLIP(Q0, b3diff));
147 
148  // same concept, budget across 4 pixels
149  b3 -= b3diff = rnd() % FFMAX(b3, 1);
150  SET(P2, RANDCLIP(P0, b3diff));
151  b3 -= b3diff = rnd() % FFMAX(b3, 1);
152  SET(Q2, RANDCLIP(Q0, b3diff));
153 
154  // extra reduced budget for weighted pixels
155  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
156  SET(P1, RANDCLIP(P0, b3diff));
157  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
158  SET(Q1, RANDCLIP(Q0, b3diff));
159 
160  buf += ystride;
161  }
162  }
163  break;
164  case 1: // weak
165  for (j = 0; j < 2; j++) {
166  tc25 = TC25(j) << (bit_depth - 8);
167  tc25diff = FFMAX(tc25 - 1, 0);
168  // 4 lines per tc
169  for (i = 0; i < 4; i++) {
170  // Weak filtering is signficantly simpler to activate as
171  // we only need to satisfy d0 + d3 < beta, which
172  // can be simplified to d0 + d0 < beta. Using the above
173  // derivations but substiuting b3 for b1 and ensuring
174  // that P0/Q0 are at least 1/2 tc25diff apart (tending
175  // towards 1/2 range).
176  b3 = (*beta << (bit_depth - 8)) >> 1;
177 
178  SET(P0, rnd() % (1 << bit_depth));
179  SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
180  (tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);
181 
182  // p3 - p0 up to beta3 budget
183  b3diff = rnd() % b3;
184  SET(P3, RANDCLIP(P0, b3diff));
185  // q3 - q0, reduced budget
186  b3diff = rnd() % FFMAX(b3 - b3diff, 1);
187  SET(Q3, RANDCLIP(Q0, b3diff));
188 
189  // same concept, budget across 4 pixels
190  b3 -= b3diff = rnd() % FFMAX(b3, 1);
191  SET(P2, RANDCLIP(P0, b3diff));
192  b3 -= b3diff = rnd() % FFMAX(b3, 1);
193  SET(Q2, RANDCLIP(Q0, b3diff));
194 
195  // extra reduced budget for weighted pixels
196  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
197  SET(P1, RANDCLIP(P0, b3diff));
198  b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
199  SET(Q1, RANDCLIP(Q0, b3diff));
200 
201  buf += ystride;
202  }
203  }
204  break;
205  case 2: // none
206  *beta = 0; // ensure skip
207  for (i = 0; i < 8; i++) {
208  // we can just fill with completely random data, nothing should be touched.
209  SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
210  SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
211  buf += ystride;
212  }
213  break;
214  }
215 }
216 
218 {
219  const char *type;
220  const char *types[3] = { "strong", "weak", "skip" };
221  int beta;
222  int32_t tc[2] = {0};
223  uint8_t no_p[2] = { rnd() & c, rnd() & c };
224  uint8_t no_q[2] = { rnd() & c, rnd() & c };
225  LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
226  LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
227  uint8_t *ptr0 = buf0 + BUF_OFFSET,
228  *ptr1 = buf1 + BUF_OFFSET;
229 
230  declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,
231  const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
232  memset(buf0, 0, BUF_SIZE);
233 
234  for (int j = 0; j < 3; j++) {
235  type = types[j];
236  if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
237  "hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
238  {
240  memcpy(buf1, buf0, BUF_SIZE);
241 
242  call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
243  call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
244  if (memcmp(buf0, buf1, BUF_SIZE))
245  fail();
246  bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
247  }
248 
249  if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
250  "hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
251  {
253  memcpy(buf1, buf0, BUF_SIZE);
254 
255  call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
256  call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
257  if (memcmp(buf0, buf1, BUF_SIZE))
258  fail();
259  bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
260  }
261  }
262 }
263 
265 {
267  int bit_depth;
268  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
271  }
272  report("chroma");
273  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
276  }
277  report("chroma_full");
278  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
281  }
282  report("luma");
283  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
286  }
287  report("luma_full");
288 }
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: hevc_deblock.c:31
ff_hevc_dsp_init
void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
Definition: dsp.c:128
SET
#define SET(x, y)
Definition: hevc_deblock.c:100
mem_internal.h
check_func
#define check_func(func,...)
Definition: checkasm.h:175
Q2
#define Q2
Definition: hevc_deblock.c:94
Q3
#define Q3
Definition: hevc_deblock.c:95
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
call_ref
#define call_ref(...)
Definition: checkasm.h:190
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
macros.h
fail
#define fail()
Definition: checkasm.h:184
checkasm.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
rnd
#define rnd()
Definition: checkasm.h:168
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:2037
intreadwrite.h
Q0
#define Q0
Definition: hevc_deblock.c:92
BUF_STRIDE
#define BUF_STRIDE
Definition: hevc_deblock.c:32
P2
#define P2
Definition: hevc_deblock.c:89
randomize_buffers
#define randomize_buffers(buf0, buf1, size)
Definition: hevc_deblock.c:38
P1
#define P1
Definition: hevc_deblock.c:90
call_new
#define call_new(...)
Definition: checkasm.h:293
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
P3
#define P3
Definition: hevc_deblock.c:88
BUF_SIZE
#define BUF_SIZE
Definition: hevc_deblock.c:36
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
check_deblock_luma
static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
Definition: hevc_deblock.c:217
TC25
#define TC25(x)
Definition: hevc_deblock.c:97
dsp.h
HEVCDSPContext
Definition: dsp.h:47
report
#define report
Definition: checkasm.h:187
bench_new
#define bench_new(...)
Definition: checkasm.h:364
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
P0
#define P0
Definition: hevc_deblock.c:91
randomize_luma_buffers
static void randomize_luma_buffers(int type, int *beta, int32_t tc[2], uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
Definition: hevc_deblock.c:118
stride
#define stride
Definition: h264pred_template.c:537
pixel_mask
static const uint32_t pixel_mask[3]
Definition: hevc_deblock.c:29
checkasm_check_hevc_deblock
void checkasm_check_hevc_deblock(void)
Definition: hevc_deblock.c:264
tc
#define tc
Definition: regdef.h:69
check_deblock_chroma
static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
Definition: hevc_deblock.c:49
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:179
BUF_OFFSET
#define BUF_OFFSET
Definition: hevc_deblock.c:35
int32_t
int32_t
Definition: audioconvert.c:56
h
h
Definition: vp9dsp_template.c:2038
RANDCLIP
#define RANDCLIP(x, diff)
Definition: hevc_deblock.c:107
Q1
#define Q1
Definition: hevc_deblock.c:93