FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Martin Storsjo
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 #include "checkasm.h"
23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/h264dsp.h"
25 #include "libavcodec/h264data.h"
26 #include "libavutil/common.h"
27 #include "libavutil/internal.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem_internal.h"
30 
31 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
32 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
33 
34 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
35 #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
36 #define PIXEL_STRIDE 16
37 
38 #define randomize_buffers() \
39  do { \
40  int x, y; \
41  uint32_t mask = pixel_mask[bit_depth - 8]; \
42  for (y = 0; y < sz; y++) { \
43  for (x = 0; x < PIXEL_STRIDE; x += 4) { \
44  AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \
45  AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \
46  } \
47  for (x = 0; x < sz; x++) { \
48  if (bit_depth == 8) { \
49  coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \
50  dst[y * PIXEL_STRIDE + x]; \
51  } else { \
52  ((int32_t *)coef)[y * sz + x] = \
53  ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \
54  ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \
55  } \
56  } \
57  } \
58  } while (0)
59 
60 #define dct4x4_impl(size, dctcoef) \
61 static void dct4x4_##size(dctcoef *coef) \
62 { \
63  int i, y, x; \
64  dctcoef tmp[16]; \
65  for (i = 0; i < 4; i++) { \
66  const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \
67  const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \
68  const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \
69  const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \
70  tmp[i + 4*0] = z0 + z1; \
71  tmp[i + 4*1] = 2*z2 + z3; \
72  tmp[i + 4*2] = z0 - z1; \
73  tmp[i + 4*3] = z2 - 2*z3; \
74  } \
75  for (i = 0; i < 4; i++) { \
76  const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \
77  const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \
78  const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \
79  const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \
80  coef[i*4 + 0] = z0 + z1; \
81  coef[i*4 + 1] = 2*z2 + z3; \
82  coef[i*4 + 2] = z0 - z1; \
83  coef[i*4 + 3] = z2 - 2*z3; \
84  } \
85  for (y = 0; y < 4; y++) { \
86  for (x = 0; x < 4; x++) { \
87  static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
88  const int idx = (y & 1) + (x & 1); \
89  coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \
90  } \
91  } \
92 }
93 
94 #define DCT8_1D(src, srcstride, dst, dststride) do { \
95  const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \
96  const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \
97  const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \
98  const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \
99  const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \
100  const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \
101  const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \
102  const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \
103  const int b0 = a0 + a6; \
104  const int b1 = a2 + a4; \
105  const int b2 = a0 - a6; \
106  const int b3 = a2 - a4; \
107  const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \
108  const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \
109  const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \
110  const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \
111  (dst)[dststride * 0] = b0 + b1; \
112  (dst)[dststride * 1] = b4 + (b7 >> 2); \
113  (dst)[dststride * 2] = b2 + (b3 >> 1); \
114  (dst)[dststride * 3] = b5 + (b6 >> 2); \
115  (dst)[dststride * 4] = b0 - b1; \
116  (dst)[dststride * 5] = b6 - (b5 >> 2); \
117  (dst)[dststride * 6] = (b2 >> 1) - b3; \
118  (dst)[dststride * 7] = (b4 >> 2) - b7; \
119 } while (0)
120 
121 #define dct8x8_impl(size, dctcoef) \
122 static void dct8x8_##size(dctcoef *coef) \
123 { \
124  int i, x, y; \
125  dctcoef tmp[64]; \
126  for (i = 0; i < 8; i++) \
127  DCT8_1D(coef + i, 8, tmp + i, 8); \
128  \
129  for (i = 0; i < 8; i++) \
130  DCT8_1D(tmp + 8*i, 1, coef + i, 8); \
131  \
132  for (y = 0; y < 8; y++) { \
133  for (x = 0; x < 8; x++) { \
134  static const int scale[] = { \
135  13107 * 20, 11428 * 18, 20972 * 32, \
136  12222 * 19, 16777 * 25, 15481 * 24, \
137  }; \
138  static const int idxmap[] = { \
139  0, 3, 4, 3, \
140  3, 1, 5, 1, \
141  4, 5, 2, 5, \
142  3, 1, 5, 1, \
143  }; \
144  const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \
145  coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \
146  scale[idx] + (1 << 17)) >> 18; \
147  } \
148  } \
149 }
150 
151 dct4x4_impl(16, int16_t)
152 dct4x4_impl(32, int32_t)
153 
154 dct8x8_impl(16, int16_t)
155 dct8x8_impl(32, int32_t)
156 
157 static void dct4x4(int16_t *coef, int bit_depth)
158 {
159  if (bit_depth == 8)
160  dct4x4_16(coef);
161  else
162  dct4x4_32((int32_t *) coef);
163 }
164 
165 static void dct8x8(int16_t *coef, int bit_depth)
166 {
167  if (bit_depth == 8) {
168  dct8x8_16(coef);
169  } else {
170  dct8x8_32((int32_t *) coef);
171  }
172 }
173 
174 
175 static void check_idct(void)
176 {
177  LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
178  LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
179  LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
180  LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);
181  LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);
182  LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
183  LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
185  int bit_depth, sz, align, dc;
186  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
187 
188  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
189  ff_h264dsp_init(&h, bit_depth, 1);
190  for (sz = 4; sz <= 8; sz += 4) {
192 
193  if (sz == 4)
194  dct4x4(coef, bit_depth);
195  else
196  dct8x8(coef, bit_depth);
197 
198  for (dc = 0; dc <= 1; dc++) {
199  void (*idct)(uint8_t *, int16_t *, int) = NULL;
200  switch ((sz << 1) | dc) {
201  case (4 << 1) | 0: idct = h.h264_idct_add; break;
202  case (4 << 1) | 1: idct = h.h264_idct_dc_add; break;
203  case (8 << 1) | 0: idct = h.h264_idct8_add; break;
204  case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break;
205  }
206  if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) {
207  for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {
208  uint8_t *dst1 = dst1_base + align;
209  if (dc) {
210  memset(subcoef0, 0, sz * sz * SIZEOF_COEF);
211  memcpy(subcoef0, coef, SIZEOF_COEF);
212  } else {
213  memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
214  }
215  memcpy(dst0, dst, sz * PIXEL_STRIDE);
216  memcpy(dst1, dst, sz * PIXEL_STRIDE);
217  memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
218  call_ref(dst0, subcoef0, PIXEL_STRIDE);
219  call_new(dst1, subcoef1, PIXEL_STRIDE);
220  if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||
221  memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))
222  fail();
223  bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);
224  }
225  }
226  }
227  }
228  }
229 }
230 
231 static void check_idct_multiple(void)
232 {
233  LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);
234  LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);
235  LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);
236  LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);
237  LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);
238  LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);
239  LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);
241  int bit_depth, i, y, func;
242  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);
243 
244  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
245  ff_h264dsp_init(&h, bit_depth, 1);
246  for (func = 0; func < 3; func++) {
247  void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;
248  const char *name;
249  int sz = 4, intra = 0;
250  int block_offset[16] = { 0 };
251  switch (func) {
252  case 0:
253  idct = h.h264_idct_add16;
254  name = "h264_idct_add16";
255  break;
256  case 1:
258  name = "h264_idct_add16intra";
259  intra = 1;
260  break;
261  case 2:
262  idct = h.h264_idct8_add4;
263  name = "h264_idct8_add4";
264  sz = 8;
265  break;
266  }
267  memset(nnzc, 0, 15 * 8);
268  memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);
269  for (i = 0; i < 16 * 16; i += sz * sz) {
270  uint8_t src[8 * 8 * 2];
271  uint8_t dst[8 * 8 * 2];
272  int16_t coef[8 * 8 * 2];
273  int index = i / sz;
274  int block_y = (index / 16) * sz;
275  int block_x = index % 16;
276  int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
277  int nnz = rnd() % 3;
278 
280  if (sz == 4)
281  dct4x4(coef, bit_depth);
282  else
283  dct8x8(coef, bit_depth);
284 
285  for (y = 0; y < sz; y++)
286  memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],
287  &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);
288 
289  if (nnz > 1)
290  nnz = sz * sz;
291  memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],
292  coef, nnz * SIZEOF_COEF);
293 
294  if (intra && nnz == 1)
295  nnz = 0;
296 
297  nnzc[scan8[i / 16]] = nnz;
298  block_offset[i / 16] = offset;
299  }
300 
301  if (check_func(idct, "%s_%dbpp", name, bit_depth)) {
302  memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);
303  memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);
304  memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);
305  memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);
306  call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);
307  call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
308  if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||
309  memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))
310  fail();
311  bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);
312  }
313  }
314  }
315 }
316 
317 
318 static void check_loop_filter(void)
319 {
320  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
321  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
322  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
324  int bit_depth;
325  int alphas[36], betas[36];
326  int8_t tc0[36][4];
327 
328  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
329  int alpha, int beta, int8_t *tc0);
330 
331  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
332  int i, j, a, c;
333  uint32_t mask = pixel_mask_lf[bit_depth - 8];
334  ff_h264dsp_init(&h, bit_depth, 1);
335  for (i = 35, a = 255, c = 250; i >= 0; i--) {
336  alphas[i] = a << (bit_depth - 8);
337  betas[i] = (i + 1) / 2 << (bit_depth - 8);
338  tc0[i][0] = tc0[i][3] = (c + 6) / 10;
339  tc0[i][1] = (c + 7) / 15;
340  tc0[i][2] = (c + 9) / 20;
341  a = a*9/10;
342  c = c*9/10;
343  }
344 
345 #define CHECK_LOOP_FILTER(name, align, idc) \
346  do { \
347  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
348  for (j = 0; j < 36; j++) { \
349  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
350  for (i = 0; i < 1024; i+=4) { \
351  AV_WN32A(dst + i, rnd() & mask); \
352  } \
353  memcpy(dst0, dst, 32 * 16 * 2); \
354  memcpy(dst1, dst, 32 * 16 * 2); \
355  \
356  call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
357  call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
358  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
359  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \
360  "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
361  tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
362  fail(); \
363  } \
364  bench_new(dst1, 32, alphas[j], betas[j], tc0[j]); \
365  } \
366  } \
367  } while (0)
368 
375 
376  ff_h264dsp_init(&h, bit_depth, 2);
379 #undef CHECK_LOOP_FILTER
380  }
381 }
382 
383 static void check_loop_filter_intra(void)
384 {
385  LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
386  LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
387  LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
389  int bit_depth;
390  int alphas[36], betas[36];
391 
392  declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
393  int alpha, int beta);
394 
395  for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
396  int i, j, a;
397  uint32_t mask = pixel_mask_lf[bit_depth - 8];
398  ff_h264dsp_init(&h, bit_depth, 1);
399  for (i = 35, a = 255; i >= 0; i--) {
400  alphas[i] = a << (bit_depth - 8);
401  betas[i] = (i + 1) / 2 << (bit_depth - 8);
402  a = a*9/10;
403  }
404 
405 #define CHECK_LOOP_FILTER(name, align, idc) \
406  do { \
407  if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
408  for (j = 0; j < 36; j++) { \
409  intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
410  for (i = 0; i < 1024; i+=4) { \
411  AV_WN32A(dst + i, rnd() & mask); \
412  } \
413  memcpy(dst0, dst, 32 * 16 * 2); \
414  memcpy(dst1, dst, 32 * 16 * 2); \
415  \
416  call_ref(dst0 + off, 32, alphas[j], betas[j]); \
417  call_new(dst1 + off, 32, alphas[j], betas[j]); \
418  if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \
419  fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \
420  j, alphas[j], betas[j]); \
421  fail(); \
422  } \
423  bench_new(dst1, 32, alphas[j], betas[j]); \
424  } \
425  } \
426  } while (0)
427 
434 
435  ff_h264dsp_init(&h, bit_depth, 2);
438 #undef CHECK_LOOP_FILTER
439  }
440 }
441 
443 {
444  check_idct();
446  report("idct");
447 
449  report("loop_filter");
450 
452  report("loop_filter_intra");
453 }
#define NULL
Definition: coverity.c:32
static void idct(int16_t block[64])
Definition: 4xm.c:164
static void check_idct_multiple(void)
Definition: h264dsp.c:231
static void check_loop_filter_intra(void)
Definition: h264dsp.c:383
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
static void check_idct(void)
Definition: h264dsp.c:175
fg index
static void FUNCC() h264_h_loop_filter_chroma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
#define dct8x8_impl(size, dctcoef)
Definition: h264dsp.c:121
static const uint32_t pixel_mask[3]
Definition: h264dsp.c:31
H.264 DSP functions.
#define report
Definition: checkasm.h:136
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
static void check_loop_filter(void)
Definition: h264dsp.c:318
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
static void FUNCC() h264_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:99
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:81
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:87
#define src
Definition: vp8dsp.c:255
static void FUNCC() h264_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
static void dct8x8(int16_t *coef, int bit_depth)
Definition: h264dsp.c:165
static void FUNCC() h264_h_loop_filter_luma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
static const uint16_t mask[17]
Definition: lzw.c:38
void checkasm_check_h264dsp(void)
Definition: h264dsp.c:442
#define fail()
Definition: checkasm.h:133
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:90
common internal API header
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
static void FUNCC() h264_h_loop_filter_luma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:254
int32_t
#define CHECK_LOOP_FILTER(name, align, idc)
static void FUNCC() h264_h_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:130
static void FUNCC() h264_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:83
#define call_ref(...)
Definition: checkasm.h:139
static void FUNCC() h264_h_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
static void dct4x4(int16_t *coef)
Definition: vp8dsp.c:46
Libavcodec external API header.
#define PIXEL_STRIDE
Definition: h264dsp.c:36
static const int16_t alpha[]
Definition: ilbcdata.h:55
#define SIZEOF_PIXEL
Definition: h264dsp.c:34
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:651
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:93
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:67
#define check_func(func,...)
Definition: checkasm.h:124
#define SIZEOF_COEF
Definition: h264dsp.c:35
static void FUNCC() h264_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
int
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
common internal and external API header
#define rnd()
Definition: checkasm.h:117
static const uint32_t pixel_mask_lf[3]
Definition: h264dsp.c:32
#define dct4x4_impl(size, dctcoef)
Definition: h264dsp.c:60
static void FUNCC() h264_v_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define bench_new(...)
Definition: checkasm.h:271
static void FUNCC() h264_v_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:67
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:85
static void FUNCC() h264_h_loop_filter_chroma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
#define call_new(...)
Definition: checkasm.h:211
int i
Definition: input.c:407
#define randomize_buffers()
Definition: h264dsp.c:38
const char * name
Definition: opengl_enc.c:102