FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/h264dsp.h"
25 #include "libavcodec/h264data.h"
26 #include "libavutil/common.h"
27 #include "libavutil/internal.h"
28 #include "libavutil/intreadwrite.h"
29 
30 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
31 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
32 
33 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
34 #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
35 #define PIXEL_STRIDE 16
36 
37 #define randomize_buffers() \
38  do { \
39  int x, y; \
40  uint32_t mask = pixel_mask[bit_depth - 8]; \
41  for (y = 0; y < sz; y++) { \
42  for (x = 0; x < PIXEL_STRIDE; x += 4) { \
43  AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
44  AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
45  } \
46  for (x = 0; x < sz; x++) { \
47  if (bit_depth == 8) { \
48  coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
49  dst[y * PIXEL_STRIDE + x]; \
50  } else { \
51  ((int32_t *)coef)[y * sz + x] = \
52  ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
53  ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
54  } \
55  } \
56  } \
57  } while (0)
58 
59 #define dct4x4_impl(size, dctcoef) \
60 static void dct4x4_##size(dctcoef *coef) \
61 { \
62  int i, y, x; \
63  dctcoef tmp[16]; \
64  for (i = 0; i < 4; i++) { \
65  const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
66  const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
67  const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
68  const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
69  tmp[i + 4*0] = z0 + z1; \
70  tmp[i + 4*1] = 2*z2 + z3; \
71  tmp[i + 4*2] = z0 - z1; \
72  tmp[i + 4*3] = z2 - 2*z3; \
73  } \
74  for (i = 0; i < 4; i++) { \
75  const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
76  const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
77  const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
78  const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
79  coef[i*4 + 0] = z0 + z1; \
80  coef[i*4 + 1] = 2*z2 + z3; \
81  coef[i*4 + 2] = z0 - z1; \
82  coef[i*4 + 3] = z2 - 2*z3; \
83  } \
84  for (y = 0; y < 4; y++) { \
85  for (x = 0; x < 4; x++) { \
86  static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
87  const int idx = (y & 1) + (x & 1); \
88  coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
89  } \
90  } \
91 }
92 
93 #define DCT8_1D(src, srcstride, dst, dststride) do { \
94  const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
95  const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
96  const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
97  const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
98  const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
99  const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
100  const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
101  const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
102  const int b0 = a0 + a6; \
103  const int b1 = a2 + a4; \
104  const int b2 = a0 - a6; \
105  const int b3 = a2 - a4; \
106  const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
107  const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
108  const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
109  const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
110  (dst)[dststride * 0] = b0 + b1; \
111  (dst)[dststride * 1] = b4 + (b7 >> 2); \
112  (dst)[dststride * 2] = b2 + (b3 >> 1); \
113  (dst)[dststride * 3] = b5 + (b6 >> 2); \
114  (dst)[dststride * 4] = b0 - b1; \
115  (dst)[dststride * 5] = b6 - (b5 >> 2); \
116  (dst)[dststride * 6] = (b2 >> 1) - b3; \
117  (dst)[dststride * 7] = (b4 >> 2) - b7; \
118 } while (0)
119 
120 #define dct8x8_impl(size, dctcoef) \
121 static void dct8x8_##size(dctcoef *coef) \
122 { \
123  int i, x, y; \
124  dctcoef tmp[64]; \
125  for (i = 0; i < 8; i++) \
126  DCT8_1D(coef + i, 8, tmp + i, 8); \
127  \
128  for (i = 0; i < 8; i++) \
129  DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
130  \
131  for (y = 0; y < 8; y++) { \
132  for (x = 0; x < 8; x++) { \
133  static const int scale[] = { \
134  13107 * 20, 11428 * 18, 20972 * 32, \
135  12222 * 19, 16777 * 25, 15481 * 24, \
136  }; \
137  static const int idxmap[] = { \
138  0, 3, 4, 3, \
139  3, 1, 5, 1, \
140  4, 5, 2, 5, \
141  3, 1, 5, 1, \
142  }; \
143  const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
144  coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
145  scale[idx] + (1 << 17)) >> 18; \
146  } \
147  } \
148 }
149 
150 dct4x4_impl(16, int16_t)
151 dct4x4_impl(32, int32_t)
152 
153 dct8x8_impl(16, int16_t)
154 dct8x8_impl(32, int32_t)
155 
156 static void dct4x4(int16_t *coef, int bit_depth)
157 {
158  if (bit_depth == 8)
159  dct4x4_16(coef);
160  else
161  dct4x4_32((int32_t *) coef);
162 }
163 
164 static void dct8x8(int16_t *coef, int bit_depth)
165 {
166  if (bit_depth == 8) {
167  dct8x8_16(coef);
168  } else {
169  dct8x8_32((int32_t *) coef);
170  }
171 }
172 
173 
174 static void check_idct(void)
175 {
176  LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
177  LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
178  LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
179  LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
180  LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
181  LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
182  LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
184  int bit_depth, sz, align, dc;
185  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
186 
187  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
188  ff_h264dsp_init(&h, bit_depth, 1);
189  for (sz = 4; sz <= 8; sz += 4) {
191 
192  if (sz == 4)
193  dct4x4(coef, bit_depth);
194  else
195  dct8x8(coef, bit_depth);
196 
197  for (dc = 0; dc <= 1; dc++) {
198  void (*idct)(uint8_t *, int16_t *, int) = NULL;
199  switch ((sz << 1) | dc) {
200  case (4 << 1) | 0: idct = h.h264_idct_add; break;
201  case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
202  case (8 << 1) | 0: idct = h.h264_idct8_add; break;
203  case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
204  }
205  if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
206  for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
207  uint8_t *dst1 = dst1_base + align;
208  if (dc) {
209  memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
210  memcpy(subcoef0, coef, SIZEOF_COEF);
211  } else {
212  memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
213  }
214  memcpy(dst0, dst, sz * PIXEL_STRIDE);
215  memcpy(dst1, dst, sz * PIXEL_STRIDE);
216  memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
217  call_ref(dst0, subcoef0, PIXEL_STRIDE);
218  call_new(dst1, subcoef1, PIXEL_STRIDE);
219  if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
220  memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
221  fail();
222  bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
223  }
224  }
225  }
226  }
227  }
228 }
229 
230 static void check_idct_multiple(void)
231 {
232  LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
233  LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
234  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
235  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
236  LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
237  LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
238  LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
240  int bit_depth, i, y, func;
241  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
242 
243  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
244  ff_h264dsp_init(&h, bit_depth, 1);
245  for (func = 0; func < 3; func++) {
246  void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
247  const char *name;
248  int sz = 4, intra = 0;
249  int block_offset[16] = { 0 };
250  switch (func) {
251  case 0:
252  idct = h.h264_idct_add16;
253  name = "h264_idct_add16";
254  break;
255  case 1:
257  name = "h264_idct_add16intra";
258  intra = 1;
259  break;
260  case 2:
261  idct = h.h264_idct8_add4;
262  name = "h264_idct8_add4";
263  sz = 8;
264  break;
265  }
266  memset(nnzc, 0, 15 * 8);
267  memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
268  for (i = 0; i < 16 * 16; i += sz * sz) {
269  uint8_t src[8 * 8 * 2];
270  uint8_t dst[8 * 8 * 2];
271  int16_t coef[8 * 8 * 2];
272  int index = i / sz;
273  int block_y = (index / 16) * sz;
274  int block_x = index % 16;
275  int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
276  int nnz = rnd() % 3;
277 
279  if (sz == 4)
280  dct4x4(coef, bit_depth);
281  else
282  dct8x8(coef, bit_depth);
283 
284  for (y = 0; y < sz; y++)
285  memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
286  &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
287 
288  if (nnz > 1)
289  nnz = sz * sz;
290  memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
291  coef, nnz * SIZEOF_COEF);
292 
293  if (intra && nnz == 1)
294  nnz = 0;
295 
296  nnzc[scan8[i / 16]] = nnz;
297  block_offset[i / 16] = offset;
298  }
299 
300  if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
301  memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
302  memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
303  memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
304  memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
305  call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
306  call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
307  if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
308  memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
309  fail();
310  bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
311  }
312  }
313  }
314 }
315 
316 
317 static void check_loop_filter(void)
318 {
319  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
320  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
321  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
323  int bit_depth;
324  int alphas[36], betas[36];
325  int8_t tc0[36][4];
326 
327  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
328  int alpha, int beta, int8_t *tc0);
329 
330  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
331  int i, j, a, c;
332  uint32_t mask = pixel_mask_lf[bit_depth - 8];
333  ff_h264dsp_init(&h, bit_depth, 1);
334  for (i = 35, a = 255, c = 250; i >= 0; i--) {
335  alphas[i] = a << (bit_depth - 8);
336  betas[i] = (i + 1) / 2 << (bit_depth - 8);
337  tc0[i][0] = tc0[i][3] = (c + 6) / 10;
338  tc0[i][1] = (c + 7) / 15;
339  tc0[i][2] = (c + 9) / 20;
340  a = a*9/10;
341  c = c*9/10;
342  }
343 
344 #define CHECK_LOOP_FILTER(name, align, idc) \
345  do { \
346  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
347  for (j = 0; j < 36; j++) { \
348  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
349  for (i = 0; i < 1024; i+=4) { \
350  AV_WN32A(dst + i, rnd() & mask); \
351  } \
352  memcpy(dst0, dst, 32 * 16 * 2); \
353  memcpy(dst1, dst, 32 * 16 * 2); \
354  \
355  call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
356  call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
357  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
358  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
359  "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
360  tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
361  fail(); \
362  } \
363  bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \
364  } \
365  } \
366  } while (0)
367 
374 
375  ff_h264dsp_init(&h, bit_depth, 2);
378 #undef CHECK_LOOP_FILTER
379  }
380 }
381 
382 static void check_loop_filter_intra(void)
383 {
384  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
385  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
386  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
388  int bit_depth;
389  int alphas[36], betas[36];
390 
391  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
392  int alpha, int beta);
393 
394  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
395  int i, j, a;
396  uint32_t mask = pixel_mask_lf[bit_depth - 8];
397  ff_h264dsp_init(&h, bit_depth, 1);
398  for (i = 35, a = 255; i >= 0; i--) {
399  alphas[i] = a << (bit_depth - 8);
400  betas[i] = (i + 1) / 2 << (bit_depth - 8);
401  a = a*9/10;
402  }
403 
404 #define CHECK_LOOP_FILTER(name, align, idc) \
405  do { \
406  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
407  for (j = 0; j < 36; j++) { \
408  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
409  for (i = 0; i < 1024; i+=4) { \
410  AV_WN32A(dst + i, rnd() & mask); \
411  } \
412  memcpy(dst0, dst, 32 * 16 * 2); \
413  memcpy(dst1, dst, 32 * 16 * 2); \
414  \
415  call_ref(dst0 + off, 32, alphas[j], betas[j]); \
416  call_new(dst1 + off, 32, alphas[j], betas[j]); \
417  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
418  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
419  j, alphas[j], betas[j]); \
420  fail(); \
421  } \
422  bench_new(dst1, 32, alphas[j], betas[j]); \
423  } \
424  } \
425  } while (0)
426 
433 
434  ff_h264dsp_init(&h, bit_depth, 2);
437 #undef CHECK_LOOP_FILTER
438  }
439 }
440 
442 {
443  check_idct();
445  report("idct");
446 
448  report("loop_filter");
449 
451  report("loop_filter_intra");
452 }
#define NULL
Definition: coverity.c:32
static void idct(int16_t block[64])
Definition: 4xm.c:163
static void check_idct_multiple(void)
Definition: h264dsp.c:230
static void check_loop_filter_intra(void)
Definition: h264dsp.c:382
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
static void check_idct(void)
Definition: h264dsp.c:174
#define src
Definition: vp8dsp.c:254
static void FUNCC() h264_h_loop_filter_chroma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
#define dct8x8_impl(size, dctcoef)
Definition: h264dsp.c:120
static const uint32_t pixel_mask[3]
Definition: h264dsp.c:30
H.264 DSP functions.
#define report
Definition: checkasm.h:125
static void check_loop_filter(void)
Definition: h264dsp.c:317
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
static void FUNCC() h264_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:99
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:81
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:87
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
static void FUNCC() h264_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
static void dct8x8(int16_t *coef, int bit_depth)
Definition: h264dsp.c:164
static void FUNCC() h264_h_loop_filter_luma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
static const uint16_t mask[17]
Definition: lzw.c:38
void checkasm_check_h264dsp(void)
Definition: h264dsp.c:441
#define fail()
Definition: checkasm.h:122
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:90
common internal API header
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
static void FUNCC() h264_h_loop_filter_luma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:226
int32_t
#define CHECK_LOOP_FILTER(name, align, idc)
static void FUNCC() h264_h_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:119
static void FUNCC() h264_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:83
#define call_ref(...)
Definition: checkasm.h:128
const AVS_VideoInfo int align
Definition: avisynth_c.h:887
static void FUNCC() h264_h_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:45
Libavcodec external API header.
#define PIXEL_STRIDE
Definition: h264dsp.c:35
static const int16_t alpha[]
Definition: ilbcdata.h:55
#define SIZEOF_PIXEL
Definition: h264dsp.c:33
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:644
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:93
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
int index
Definition: gxfenc.c:89
#define check_func(func,...)
Definition: checkasm.h:113
#define SIZEOF_COEF
Definition: h264dsp.c:34
static void FUNCC() h264_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
int
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
common internal and external API header
#define rnd()
Definition: checkasm.h:106
static const uint32_t pixel_mask_lf[3]
Definition: h264dsp.c:31
#define dct4x4_impl(size, dctcoef)
Definition: h264dsp.c:59
static void FUNCC() h264_v_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define bench_new(...)
Definition: checkasm.h:255
static void FUNCC() h264_v_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:67
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:85
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:131
static void FUNCC() h264_h_loop_filter_chroma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define call_new(...)
Definition: checkasm.h:195
#define randomize_buffers()
Definition: h264dsp.c:37
const char * name
Definition: opengl_enc.c:102