FFmpeg
hevc_pred.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2026 Jun Zhao <barryjzhao@tencent.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavcodec/hevc/pred.h"
24 #include "libavutil/intreadwrite.h"
25 #include "libavutil/mem_internal.h"
26 
27 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
28 
29 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
30 #define PRED_SIZE 128 /* Increased to 4 * MAX_TB_SIZE to accommodate C code reads */
31 
32 #define randomize_ref_buffers() \
33  do { \
34  uint32_t mask = pixel_mask[bit_depth - 8]; \
35  for (int i = -4; i < PRED_SIZE; i += 4) { \
36  uint32_t r = rnd() & mask; \
37  AV_WN32A(top + i, r); \
38  AV_WN32A(left + i, r); \
39  } \
40  } while (0)
41 
43  uint8_t *top, uint8_t *left, int bit_depth)
44 {
45  const char *const block_name[] = { "4x4", "8x8", "16x16", "32x32" };
46  const int block_size[] = { 4, 8, 16, 32 };
47  int log2_size;
48 
49  PIXEL_RECT(buf0, 64, 64);
50  PIXEL_RECT(buf1, 64, 64);
51 
52  declare_func(void, uint8_t *src, const uint8_t *top,
53  const uint8_t *left, ptrdiff_t stride,
54  int log2_size, int c_idx);
55 
56  /* Test all 4 sizes: 4x4, 8x8, 16x16, 32x32 */
57  for (log2_size = 2; log2_size <= 5; log2_size++) {
58  int size = block_size[log2_size - 2];
59 
60  if (check_func(h->pred_dc, "hevc_pred_dc_%s_%d",
61  block_name[log2_size - 2], bit_depth)) {
62  /* Test with c_idx=0 (luma, with edge smoothing for size < 32) */
64  CLEAR_PIXEL_RECT(buf0);
65  CLEAR_PIXEL_RECT(buf1);
66  call_ref(buf0, top, left, buf0_stride, log2_size, 0);
67  call_new(buf1, top, left, buf1_stride, log2_size, 0);
68  checkasm_check_pixel_padded(buf0, buf0_stride,
69  buf1, buf1_stride,
70  size, size, "dst");
71 
72  /* Test with c_idx=1 (chroma, no edge smoothing) */
74  CLEAR_PIXEL_RECT(buf0);
75  CLEAR_PIXEL_RECT(buf1);
76  call_ref(buf0, top, left, buf0_stride, log2_size, 1);
77  call_new(buf1, top, left, buf1_stride, log2_size, 1);
78  checkasm_check_pixel_padded(buf0, buf0_stride,
79  buf1, buf1_stride,
80  size, size, "dst");
81 
82  bench_new(buf1, top, left, buf1_stride, log2_size, 0);
83  }
84  }
85 }
86 
88  uint8_t *top, uint8_t *left, int bit_depth)
89 {
90  const char *const block_name[] = { "4x4", "8x8", "16x16", "32x32" };
91  const int block_size[] = { 4, 8, 16, 32 };
92  int i;
93 
94  PIXEL_RECT(buf0, 64, 64);
95  PIXEL_RECT(buf1, 64, 64);
96 
97  declare_func(void, uint8_t *src, const uint8_t *top,
98  const uint8_t *left, ptrdiff_t stride);
99 
100  /* Test all 4 sizes: 4x4, 8x8, 16x16, 32x32 */
101  for (i = 0; i < 4; i++) {
102  int size = block_size[i];
103 
104  if (check_func(h->pred_planar[i], "hevc_pred_planar_%s_%d",
105  block_name[i], bit_depth)) {
107  CLEAR_PIXEL_RECT(buf0);
108  CLEAR_PIXEL_RECT(buf1);
109  call_ref(buf0, top, left, buf0_stride);
110  call_new(buf1, top, left, buf1_stride);
111  checkasm_check_pixel_padded(buf0, buf0_stride,
112  buf1, buf1_stride,
113  size, size, "dst");
114 
115  bench_new(buf1, top, left, buf1_stride);
116  }
117  }
118 }
119 
120 /*
121  * Angular prediction modes are divided into categories:
122  *
123  * Mode 10: Horizontal pure copy (H pure)
124  * Mode 26: Vertical pure copy (V pure)
125  * Modes 2-9: Horizontal positive angle (H pos) - uses left reference
126  * Modes 11-17: Horizontal negative angle (H neg) - needs reference extension
127  * Modes 18-25: Vertical negative angle (V neg) - needs reference extension
128  * Modes 27-34: Vertical positive angle (V pos) - uses top reference
129  *
130  * Each category has 4 NEON functions for 4x4, 8x8, 16x16, 32x32 sizes.
131  */
133  uint8_t *top, uint8_t *left, int bit_depth)
134 {
135  const char *const block_name[] = { "4x4", "8x8", "16x16", "32x32" };
136  const int block_size[] = { 4, 8, 16, 32 };
137  int i, mode;
138 
139  PIXEL_RECT(buf0, 64, 64);
140  PIXEL_RECT(buf1, 64, 64);
141 
142  declare_func(void, uint8_t *src, const uint8_t *top,
143  const uint8_t *left, ptrdiff_t stride, int c_idx, int mode);
144 
145  /* Test all 4 sizes */
146  for (i = 0; i < 4; i++) {
147  int size = block_size[i];
148 
149  /* Test all 33 angular modes (2-34) */
150  for (mode = 2; mode <= 34; mode++) {
151  const char *mode_category;
152 
153  /* Determine mode category for descriptive test name */
154  if (mode == 10)
155  mode_category = "Hpure";
156  else if (mode == 26)
157  mode_category = "Vpure";
158  else if (mode >= 2 && mode <= 9)
159  mode_category = "Hpos";
160  else if (mode >= 11 && mode <= 17)
161  mode_category = "Hneg";
162  else if (mode >= 18 && mode <= 25)
163  mode_category = "Vneg";
164  else /* mode >= 27 && mode <= 34 */
165  mode_category = "Vpos";
166 
167  if (check_func(h->pred_angular[i],
168  "hevc_pred_angular_%s_%s_mode%d_%d",
169  block_name[i], mode_category, mode, bit_depth)) {
170  /* Test with c_idx=0 (luma) */
172  CLEAR_PIXEL_RECT(buf0);
173  CLEAR_PIXEL_RECT(buf1);
174  call_ref(buf0, top, left, buf0_stride, 0, mode);
175  call_new(buf1, top, left, buf1_stride, 0, mode);
176  checkasm_check_pixel_padded(buf0, buf0_stride,
177  buf1, buf1_stride,
178  size, size, "dst");
179 
180  /* Test with c_idx=1 (chroma) for modes 10/26 to cover
181  * the edge filtering skip path */
182  if (mode == 10 || mode == 26) {
184  CLEAR_PIXEL_RECT(buf0);
185  CLEAR_PIXEL_RECT(buf1);
186  call_ref(buf0, top, left, buf0_stride, 1, mode);
187  call_new(buf1, top, left, buf1_stride, 1, mode);
188  checkasm_check_pixel_padded(buf0, buf0_stride,
189  buf1, buf1_stride,
190  size, size, "dst");
191  }
192 
193  bench_new(buf1, top, left, buf1_stride, 0, mode);
194  }
195  }
196  }
197 }
198 
200  uint8_t *top, uint8_t *left, int bit_depth)
201 {
202  const char *const block_name[] = { "8x8", "16x16", "32x32" };
203  const int block_size[] = { 8, 16, 32 };
204  int i;
205 
206  /* 3-tap filter: out[i] = (in[i+1] + 2*in[i] + in[i-1] + 2) >> 2
207  * Filters 2*size-1 samples (indices 0..2*size-2) plus corner [-1].
208  * Output: filtered_left[-1..2*size-1] and filtered_top[-1..2*size-1] */
209  declare_func(void, uint8_t *filtered_left, uint8_t *filtered_top,
210  const uint8_t *left, const uint8_t *top, int size);
211 
212  for (i = 0; i < 3; i++) {
213  int size = block_size[i];
214  int n = 2 * size;
215 
216  if (check_func(h->ref_filter_3tap[i],
217  "hevc_ref_filter_3tap_%s_%d",
218  block_name[i], bit_depth)) {
219  /* Allocate output buffers with space for [-1] indexing.
220  * Need n+1 elements: indices [-1..n-1] = n+1 pixels.
221  * Use (n+1)*SIZEOF_PIXEL bytes starting at offset SIZEOF_PIXEL. */
222  LOCAL_ALIGNED_32(uint8_t, fl_ref_buf, [PRED_SIZE + 16]);
223  LOCAL_ALIGNED_32(uint8_t, fl_new_buf, [PRED_SIZE + 16]);
224  LOCAL_ALIGNED_32(uint8_t, ft_ref_buf, [PRED_SIZE + 16]);
225  LOCAL_ALIGNED_32(uint8_t, ft_new_buf, [PRED_SIZE + 16]);
226  uint8_t *fl_ref = fl_ref_buf + 8;
227  uint8_t *fl_new = fl_new_buf + 8;
228  uint8_t *ft_ref = ft_ref_buf + 8;
229  uint8_t *ft_new = ft_new_buf + 8;
230 
232  /* Clear output buffers so comparison is clean */
233  memset(fl_ref_buf, 0, PRED_SIZE + 16);
234  memset(fl_new_buf, 0, PRED_SIZE + 16);
235  memset(ft_ref_buf, 0, PRED_SIZE + 16);
236  memset(ft_new_buf, 0, PRED_SIZE + 16);
237 
238  call_ref(fl_ref, ft_ref, left, top, size);
239  call_new(fl_new, ft_new, left, top, size);
240 
241  /* Compare filtered_left[-1..2*size-1] and filtered_top[-1..2*size-1] */
242  if (memcmp(fl_ref - SIZEOF_PIXEL, fl_new - SIZEOF_PIXEL,
243  (n + 1) * SIZEOF_PIXEL))
244  fail();
245  if (memcmp(ft_ref - SIZEOF_PIXEL, ft_new - SIZEOF_PIXEL,
246  (n + 1) * SIZEOF_PIXEL))
247  fail();
248 
249  bench_new(fl_new, ft_new, left, top, size);
250  }
251  }
252 }
253 
255  uint8_t *top, uint8_t *left,
256  int bit_depth)
257 {
258  /* Strong intra smoothing: only 32x32 luma.
259  * Interpolates top into filtered_top[0..62], sets filtered_top[-1] and [63].
260  * Modifies left[0..62] in-place. */
261  declare_func(void, uint8_t *filtered_top, uint8_t *left,
262  const uint8_t *top);
263 
264  if (check_func(h->ref_filter_strong,
265  "hevc_ref_filter_strong_%d", bit_depth)) {
266  LOCAL_ALIGNED_32(uint8_t, ft_ref_buf, [PRED_SIZE + 16]);
267  LOCAL_ALIGNED_32(uint8_t, ft_new_buf, [PRED_SIZE + 16]);
268  LOCAL_ALIGNED_32(uint8_t, left_ref_buf, [PRED_SIZE + 16]);
269  LOCAL_ALIGNED_32(uint8_t, left_new_buf, [PRED_SIZE + 16]);
270  uint8_t *ft_ref = ft_ref_buf + 8;
271  uint8_t *ft_new = ft_new_buf + 8;
272  uint8_t *left_ref = left_ref_buf + 8;
273  uint8_t *left_new = left_new_buf + 8;
274 
276  memset(ft_ref_buf, 0, PRED_SIZE + 16);
277  memset(ft_new_buf, 0, PRED_SIZE + 16);
278 
279  /* Copy left so both ref and new start with the same input
280  * (left is modified in-place) */
281  memcpy(left_ref_buf, left - 8, PRED_SIZE + 16);
282  memcpy(left_new_buf, left - 8, PRED_SIZE + 16);
283 
284  call_ref(ft_ref, left_ref, top);
285  call_new(ft_new, left_new, top);
286 
287  /* Compare filtered_top[-1..63] = 65 pixels */
288  if (memcmp(ft_ref - SIZEOF_PIXEL, ft_new - SIZEOF_PIXEL,
289  65 * SIZEOF_PIXEL))
290  fail();
291 
292  /* Compare left[-1..63] = 65 pixels (left[-1] is unchanged,
293  * left[0..62] are modified, left[63] is unchanged) */
294  if (memcmp(left_ref - SIZEOF_PIXEL, left_new - SIZEOF_PIXEL,
295  65 * SIZEOF_PIXEL))
296  fail();
297 
298  bench_new(ft_new, left_new, top);
299  }
300 }
301 
303 {
304  LOCAL_ALIGNED_32(uint8_t, top_buf, [PRED_SIZE + 16]);
305  LOCAL_ALIGNED_32(uint8_t, left_buf, [PRED_SIZE + 16]);
306  /* Add offset of 8 bytes to allow negative indexing (top[-1], left[-1]) */
307  uint8_t *top = top_buf + 8;
308  uint8_t *left = left_buf + 8;
309  int bit_depth;
310 
311  for (bit_depth = 8; bit_depth <= 10; bit_depth += 2) {
313 
315  check_pred_dc(&h, top, left, bit_depth);
316  }
317  report("pred_dc");
318 
319  for (bit_depth = 8; bit_depth <= 10; bit_depth += 2) {
321 
324  }
325  report("pred_planar");
326 
327  for (bit_depth = 8; bit_depth <= 10; bit_depth += 2) {
329 
332  }
333  report("pred_angular");
334 
335  for (bit_depth = 8; bit_depth <= 10; bit_depth += 2) {
337 
340  }
341  report("ref_filter_3tap");
342 
343  for (bit_depth = 8; bit_depth <= 10; bit_depth += 2) {
345 
348  }
349  report("ref_filter_strong");
350 }
check_pred_angular
static void check_pred_angular(HEVCPredContext *h, uint8_t *top, uint8_t *left, int bit_depth)
Definition: hevc_pred.c:132
mem_internal.h
PRED_SIZE
#define PRED_SIZE
Definition: hevc_pred.c:30
mode
Definition: swscale.c:71
check_func
#define check_func
Definition: test.h:480
checkasm_check_pixel_padded
#define checkasm_check_pixel_padded(...)
Definition: checkasm.h:171
bench_new
#define bench_new
Definition: test.h:486
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
call_ref
#define call_ref
Definition: test.h:484
randomize_ref_buffers
#define randomize_ref_buffers()
Definition: hevc_pred.c:32
checkasm.h
ff_hevc_pred_init
void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
Definition: pred.c:43
intreadwrite.h
declare_func
#define declare_func
Definition: test.h:488
HEVCPredContext
Definition: pred.h:32
fail
#define fail
Definition: test.h:478
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
CLEAR_PIXEL_RECT
#define CLEAR_PIXEL_RECT(name)
Definition: checkasm.h:156
pixel_mask
static const uint32_t pixel_mask[3]
Definition: hevc_pred.c:27
check_pred_planar
static void check_pred_planar(HEVCPredContext *h, uint8_t *top, uint8_t *left, int bit_depth)
Definition: hevc_pred.c:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
size
int size
Definition: twinvq_data.h:10344
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: hevc_pred.c:29
pred.h
call_new
#define call_new
Definition: test.h:485
check_pred_dc
static void check_pred_dc(HEVCPredContext *h, uint8_t *top, uint8_t *left, int bit_depth)
Definition: hevc_pred.c:42
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
mode
mode
Definition: ebur128.h:83
check_ref_filter_strong
static void check_ref_filter_strong(HEVCPredContext *h, uint8_t *top, uint8_t *left, int bit_depth)
Definition: hevc_pred.c:254
PIXEL_RECT
#define PIXEL_RECT(name, w, h)
Definition: checkasm.h:149
report
#define report
Definition: test.h:479
h
h
Definition: vp9dsp_template.c:2070
stride
#define stride
Definition: h264pred_template.c:536
checkasm_check_hevc_pred
void checkasm_check_hevc_pred(void)
Definition: hevc_pred.c:302
src
#define src
Definition: vp8dsp.c:248
check_ref_filter_3tap
static void check_ref_filter_3tap(HEVCPredContext *h, uint8_t *top, uint8_t *left, int bit_depth)
Definition: hevc_pred.c:199