FFmpeg
vvc_alf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024 Nuo Mi <nuomi2021@gmail.com>
3  * Copyright (c) 2023-2024 Wu Jianhua <toqsxw@outlook.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 
24 #include "checkasm.h"
25 #include "libavcodec/vvc/ctu.h"
26 #include "libavcodec/vvc/data.h"
27 #include "libavcodec/vvc/dsp.h"
28 
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mem_internal.h"
32 
33 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
34 
35 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
36 #define SRC_PIXEL_STRIDE (MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE)
37 #define DST_PIXEL_STRIDE (SRC_PIXEL_STRIDE + 4)
38 #define SRC_BUF_SIZE (SRC_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2) //+3 * 2 for top and bottom row, *2 for high bit depth
39 #define DST_BUF_SIZE (DST_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2)
40 #define LUMA_PARAMS_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * ALF_NUM_COEFF_LUMA)
41 
42 #define randomize_buffers(buf0, buf1, size) \
43  do { \
44  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
45  int k; \
46  for (k = 0; k < size; k += 4) { \
47  uint32_t r = rnd() & mask; \
48  AV_WN32A(buf0 + k, r); \
49  AV_WN32A(buf1 + k, r); \
50  } \
51  } while (0)
52 
53 #define randomize_buffers2(buf, size, filter) \
54  do { \
55  int k; \
56  if (filter) { \
57  for (k = 0; k < size; k++) { \
58  int8_t r = rnd(); \
59  buf[k] = r; \
60  } \
61  } else { \
62  for (k = 0; k < size; k++) { \
63  int r = rnd() % FF_ARRAY_ELEMS(clip_set); \
64  buf[k] = clip_set[r]; \
65  } \
66  } \
67  } while (0)
68 
69 static int get_alf_vb_pos(const int h, const int vb_pos_above)
70 {
71  if (h == MAX_CTU_SIZE)
72  return MAX_CTU_SIZE - vb_pos_above;
73  // If h < MAX_CTU_SIZE and picture virtual boundaries are involved, ALF virtual boundaries can either be within or outside this ALF block.
74  return ((rnd() & 1) ? h : MAX_CTU_SIZE) - vb_pos_above;
75 }
76 
77 static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
78 {
79  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
80  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
81  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
82  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
83  int16_t filter[LUMA_PARAMS_SIZE];
84  int16_t clip[LUMA_PARAMS_SIZE];
85 
86  const int16_t clip_set[] = {
87  1 << bit_depth, 1 << (bit_depth - 3), 1 << (bit_depth - 5), 1 << (bit_depth - 7)
88  };
89 
90  ptrdiff_t src_stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
91  ptrdiff_t dst_stride = DST_PIXEL_STRIDE * SIZEOF_PIXEL;
92  int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
93 
94  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
95  int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos);
96 
100 
101  for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
102  for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
103  //Both picture size and virtual boundaries are 8-aligned. For luma, we only need to check 8-aligned sizes.
104  if (!(w % 8) && !(h % 8)) {
105  if (check_func(c->alf.filter[LUMA], "vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
106  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
107  memset(dst0, 0, DST_BUF_SIZE);
108  memset(dst1, 0, DST_BUF_SIZE);
109  call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
110  call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
111  for (int i = 0; i < (h + 1); i++) {
112  if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, (w + 1) * SIZEOF_PIXEL))
113  fail();
114  }
115  // Bench only square sizes, and ones with dimensions being a power of two.
116  if (w == h && (w & (w - 1)) == 0)
117  bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
118  }
119  }
120  //For chroma, once it exceeds 64, it's not a 4:2:0 format, so we only need to check 8-aligned sizes as well.
121  if ((w <= 64 || !(w % 8)) && (h <= 64 || !(h % 8))) {
122  if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
123  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_CHROMA);
124  memset(dst0, 0, DST_BUF_SIZE);
125  memset(dst1, 0, DST_BUF_SIZE);
126  call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
127  call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
128  for (int i = 0; i < (h + 1); i++) {
129  if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, (w + 1) * SIZEOF_PIXEL))
130  fail();
131  }
132  if (w == h && (w & (w - 1)) == 0)
133  bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
134  }
135  }
136  }
137  }
138 }
139 
140 static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
141 {
142  LOCAL_ALIGNED_32(int, class_idx0, [SRC_BUF_SIZE]);
143  LOCAL_ALIGNED_32(int, transpose_idx0, [SRC_BUF_SIZE]);
144  LOCAL_ALIGNED_32(int, class_idx1, [SRC_BUF_SIZE]);
145  LOCAL_ALIGNED_32(int, transpose_idx1, [SRC_BUF_SIZE]);
146  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
147  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
149 
150  ptrdiff_t stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
151  int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
152 
153  declare_func(void, int *class_idx, int *transpose_idx,
154  const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp);
155 
157 
158  //Both picture size and virtual boundaries are 8-aligned. Classify is luma only, we only need to check 8-aligned sizes.
159  for (int h = 8; h <= MAX_CTU_SIZE; h += 8) {
160  for (int w = 8; w <= MAX_CTU_SIZE; w += 8) {
161  const int id_size = w * h / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * sizeof(int);
162  const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
163  if (check_func(c->alf.classify, "vvc_alf_classify_%dx%d_%d", w, h, bit_depth)) {
164  memset(class_idx0, 0, id_size);
165  memset(class_idx1, 0, id_size);
166  memset(transpose_idx0, 0, id_size);
167  memset(transpose_idx1, 0, id_size);
168  call_ref(class_idx0, transpose_idx0, src0 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
169 
170  call_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
171 
172  if (memcmp(class_idx0, class_idx1, id_size))
173  fail();
174  if (memcmp(transpose_idx0, transpose_idx1, id_size))
175  fail();
176  // Bench only square sizes, and ones with dimensions being a power of two.
177  if (w == h && (w & (w - 1)) == 0)
178  bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
179  }
180  }
181  }
182 }
183 
185 {
186  int bit_depth;
188  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
191  }
192  report("alf_filter");
193 
194  for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
197  }
198  report("alf_classify");
199 }
LUMA
#define LUMA
Definition: filter.c:31
mem_internal.h
ALF_VB_POS_ABOVE_LUMA
#define ALF_VB_POS_ABOVE_LUMA
Definition: ctu.h:81
src1
const pixel * src1
Definition: h264pred_template.c:421
data.h
w
uint8_t w
Definition: llviddspenc.c:38
ALF_GRADIENT_SIZE
#define ALF_GRADIENT_SIZE
Definition: ctu.h:86
check_func
#define check_func(func,...)
Definition: checkasm.h:180
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
call_ref
#define call_ref(...)
Definition: checkasm.h:195
DST_PIXEL_STRIDE
#define DST_PIXEL_STRIDE
Definition: vvc_alf.c:37
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
randomize_buffers2
#define randomize_buffers2(buf, size, filter)
Definition: vvc_alf.c:53
fail
#define fail()
Definition: checkasm.h:189
randomize_buffers
#define randomize_buffers(buf0, buf1, size)
Definition: vvc_alf.c:42
get_alf_vb_pos
static int get_alf_vb_pos(const int h, const int vb_pos_above)
Definition: vvc_alf.c:69
checkasm.h
check_alf_classify
static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
Definition: vvc_alf.c:140
rnd
#define rnd()
Definition: checkasm.h:173
dsp.h
check_alf_filter
static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
Definition: vvc_alf.c:77
clip
clip
Definition: af_crystalizer.c:122
intreadwrite.h
pixel_mask
static const uint32_t pixel_mask[3]
Definition: vvc_alf.c:33
call_new
#define call_new(...)
Definition: checkasm.h:298
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
SRC_BUF_SIZE
#define SRC_BUF_SIZE
Definition: vvc_alf.c:38
ALF_NUM_DIR
#define ALF_NUM_DIR
Definition: ctu.h:87
MAX_CTU_SIZE
#define MAX_CTU_SIZE
Definition: ctu.h:33
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
checkasm_check_vvc_alf
void checkasm_check_vvc_alf(void)
Definition: vvc_alf.c:184
ALF_BLOCK_SIZE
#define ALF_BLOCK_SIZE
Definition: ctu.h:76
LUMA_PARAMS_SIZE
#define LUMA_PARAMS_SIZE
Definition: vvc_alf.c:40
ff_vvc_dsp_init
void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
Definition: dsp.c:77
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:192
bench_new
#define bench_new(...)
Definition: checkasm.h:369
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
common.h
stride
#define stride
Definition: h264pred_template.c:537
CHROMA
@ CHROMA
Definition: vf_waveform.c:49
SRC_PIXEL_STRIDE
#define SRC_PIXEL_STRIDE
Definition: vvc_alf.c:36
src0
const pixel *const src0
Definition: h264pred_template.c:420
DST_BUF_SIZE
#define DST_BUF_SIZE
Definition: vvc_alf.c:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:184
int32_t
int32_t
Definition: audioconvert.c:56
h
h
Definition: vp9dsp_template.c:2070
ctu.h
ALF_VB_POS_ABOVE_CHROMA
#define ALF_VB_POS_ABOVE_CHROMA
Definition: ctu.h:82
width
#define width
Definition: dsp.h:85
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: vvc_alf.c:35
src
#define src
Definition: vp8dsp.c:248
VVCDSPContext
Definition: dsp.h:169