FFmpeg
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "avcodec.h"
27 #include "internal.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dec.h"
33 
34 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
35  ptrdiff_t stride, int v)
36 {
37  switch (w) {
38  case 1:
39  do {
40  *ptr = v;
41  ptr += stride;
42  } while (--h);
43  break;
44  case 2: {
45  int v16 = v * 0x0101;
46  do {
47  AV_WN16A(ptr, v16);
48  ptr += stride;
49  } while (--h);
50  break;
51  }
52  case 4: {
53  uint32_t v32 = v * 0x01010101;
54  do {
55  AV_WN32A(ptr, v32);
56  ptr += stride;
57  } while (--h);
58  break;
59  }
60  case 8: {
61 #if HAVE_FAST_64BIT
62  uint64_t v64 = v * 0x0101010101010101ULL;
63  do {
64  AV_WN64A(ptr, v64);
65  ptr += stride;
66  } while (--h);
67 #else
68  uint32_t v32 = v * 0x01010101;
69  do {
70  AV_WN32A(ptr, v32);
71  AV_WN32A(ptr + 4, v32);
72  ptr += stride;
73  } while (--h);
74 #endif
75  break;
76  }
77  }
78 }
79 
80 static void decode_mode(VP9TileData *td)
81 {
82  static const uint8_t left_ctx[N_BS_SIZES] = {
83  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
84  };
85  static const uint8_t above_ctx[N_BS_SIZES] = {
86  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
87  };
88  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
91  };
92  VP9Context *s = td->s;
93  VP9Block *b = td->b;
94  int row = td->row, col = td->col, row7 = td->row7;
95  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
96  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
97  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
98  int have_a = row > 0, have_l = col > td->tile_col_start;
99  int vref, filter_id;
100 
101  if (!s->s.h.segmentation.enabled) {
102  b->seg_id = 0;
103  } else if (s->s.h.keyframe || s->s.h.intraonly) {
104  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
105  vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob);
106  } else if (!s->s.h.segmentation.update_map ||
107  (s->s.h.segmentation.temporal &&
109  s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
110  td->left_segpred_ctx[row7]]))) {
111  if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
112  int pred = 8, x;
113  uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
114 
115  if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
116  ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
117  for (y = 0; y < h4; y++) {
118  int idx_base = (y + row) * 8 * s->sb_cols + col;
119  for (x = 0; x < w4; x++)
120  pred = FFMIN(pred, refsegmap[idx_base + x]);
121  }
122  av_assert1(pred < 8);
123  b->seg_id = pred;
124  } else {
125  b->seg_id = 0;
126  }
127 
128  memset(&s->above_segpred_ctx[col], 1, w4);
129  memset(&td->left_segpred_ctx[row7], 1, h4);
130  } else {
132  s->s.h.segmentation.prob);
133 
134  memset(&s->above_segpred_ctx[col], 0, w4);
135  memset(&td->left_segpred_ctx[row7], 0, h4);
136  }
137  if (s->s.h.segmentation.enabled &&
138  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
139  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
140  bw4, bh4, 8 * s->sb_cols, b->seg_id);
141  }
142 
143  b->skip = s->s.h.segmentation.enabled &&
144  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
145  if (!b->skip) {
146  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
147  b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]);
148  td->counts.skip[c][b->skip]++;
149  }
150 
151  if (s->s.h.keyframe || s->s.h.intraonly) {
152  b->intra = 1;
153  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
154  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
155  } else {
156  int c, bit;
157 
158  if (have_a && have_l) {
159  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
160  c += (c == 2);
161  } else {
162  c = have_a ? 2 * s->above_intra_ctx[col] :
163  have_l ? 2 * td->left_intra_ctx[row7] : 0;
164  }
165  bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]);
166  td->counts.intra[c][bit]++;
167  b->intra = !bit;
168  }
169 
170  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
171  int c;
172  if (have_a) {
173  if (have_l) {
174  c = (s->above_skip_ctx[col] ? max_tx :
175  s->above_txfm_ctx[col]) +
176  (td->left_skip_ctx[row7] ? max_tx :
177  td->left_txfm_ctx[row7]) > max_tx;
178  } else {
179  c = s->above_skip_ctx[col] ? 1 :
180  (s->above_txfm_ctx[col] * 2 > max_tx);
181  }
182  } else if (have_l) {
183  c = td->left_skip_ctx[row7] ? 1 :
184  (td->left_txfm_ctx[row7] * 2 > max_tx);
185  } else {
186  c = 1;
187  }
188  switch (max_tx) {
189  case TX_32X32:
190  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
191  if (b->tx) {
192  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
193  if (b->tx == 2)
194  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
195  }
196  td->counts.tx32p[c][b->tx]++;
197  break;
198  case TX_16X16:
199  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
200  if (b->tx)
201  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
202  td->counts.tx16p[c][b->tx]++;
203  break;
204  case TX_8X8:
205  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]);
206  td->counts.tx8p[c][b->tx]++;
207  break;
208  case TX_4X4:
209  b->tx = TX_4X4;
210  break;
211  }
212  } else {
213  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
214  }
215 
216  if (s->s.h.keyframe || s->s.h.intraonly) {
217  uint8_t *a = &s->above_mode_ctx[col * 2];
218  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
219 
220  b->comp = 0;
221  if (b->bs > BS_8x8) {
222  // FIXME the memory storage intermediates here aren't really
223  // necessary, they're just there to make the code slightly
224  // simpler for now
225  b->mode[0] =
227  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
228  if (b->bs != BS_8x4) {
230  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
231  l[0] =
232  a[1] = b->mode[1];
233  } else {
234  l[0] =
235  a[1] =
236  b->mode[1] = b->mode[0];
237  }
238  if (b->bs != BS_4x8) {
239  b->mode[2] =
241  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
242  if (b->bs != BS_8x4) {
244  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
245  l[1] =
246  a[1] = b->mode[3];
247  } else {
248  l[1] =
249  a[1] =
250  b->mode[3] = b->mode[2];
251  }
252  } else {
253  b->mode[2] = b->mode[0];
254  l[1] =
255  a[1] =
256  b->mode[3] = b->mode[1];
257  }
258  } else {
261  b->mode[3] =
262  b->mode[2] =
263  b->mode[1] = b->mode[0];
264  // FIXME this can probably be optimized
265  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
266  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
267  }
270  } else if (b->intra) {
271  b->comp = 0;
272  if (b->bs > BS_8x8) {
274  s->prob.p.y_mode[0]);
275  td->counts.y_mode[0][b->mode[0]]++;
276  if (b->bs != BS_8x4) {
278  s->prob.p.y_mode[0]);
279  td->counts.y_mode[0][b->mode[1]]++;
280  } else {
281  b->mode[1] = b->mode[0];
282  }
283  if (b->bs != BS_4x8) {
285  s->prob.p.y_mode[0]);
286  td->counts.y_mode[0][b->mode[2]]++;
287  if (b->bs != BS_8x4) {
289  s->prob.p.y_mode[0]);
290  td->counts.y_mode[0][b->mode[3]]++;
291  } else {
292  b->mode[3] = b->mode[2];
293  }
294  } else {
295  b->mode[2] = b->mode[0];
296  b->mode[3] = b->mode[1];
297  }
298  } else {
299  static const uint8_t size_group[10] = {
300  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
301  };
302  int sz = size_group[b->bs];
303 
305  s->prob.p.y_mode[sz]);
306  b->mode[1] =
307  b->mode[2] =
308  b->mode[3] = b->mode[0];
309  td->counts.y_mode[sz][b->mode[3]]++;
310  }
312  s->prob.p.uv_mode[b->mode[3]]);
313  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
314  } else {
315  static const uint8_t inter_mode_ctx_lut[14][14] = {
316  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
325  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
326  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
327  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
328  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
329  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
330  };
331 
332  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
333  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
334  b->comp = 0;
335  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
336  } else {
337  // read comp_pred flag
338  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
339  b->comp = s->s.h.comppredmode == PRED_COMPREF;
340  } else {
341  int c;
342 
343  // FIXME add intra as ref=0xff (or -1) to make these easier?
344  if (have_a) {
345  if (have_l) {
346  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
347  c = 4;
348  } else if (s->above_comp_ctx[col]) {
349  c = 2 + (td->left_intra_ctx[row7] ||
350  td->left_ref_ctx[row7] == s->s.h.fixcompref);
351  } else if (td->left_comp_ctx[row7]) {
352  c = 2 + (s->above_intra_ctx[col] ||
353  s->above_ref_ctx[col] == s->s.h.fixcompref);
354  } else {
355  c = (!s->above_intra_ctx[col] &&
356  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
357  (!td->left_intra_ctx[row7] &&
358  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
359  }
360  } else {
361  c = s->above_comp_ctx[col] ? 3 :
362  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
363  }
364  } else if (have_l) {
365  c = td->left_comp_ctx[row7] ? 3 :
366  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
367  } else {
368  c = 1;
369  }
370  b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]);
371  td->counts.comp[c][b->comp]++;
372  }
373 
374  // read actual references
375  // FIXME probably cache a few variables here to prevent repetitive
376  // memory accesses below
377  if (b->comp) { /* two references */
378  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
379 
380  b->ref[fix_idx] = s->s.h.fixcompref;
381  // FIXME can this codeblob be replaced by some sort of LUT?
382  if (have_a) {
383  if (have_l) {
384  if (s->above_intra_ctx[col]) {
385  if (td->left_intra_ctx[row7]) {
386  c = 2;
387  } else {
388  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
389  }
390  } else if (td->left_intra_ctx[row7]) {
391  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
392  } else {
393  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
394 
395  if (refl == refa && refa == s->s.h.varcompref[1]) {
396  c = 0;
397  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
398  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
399  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
400  c = 4;
401  } else {
402  c = (refa == refl) ? 3 : 1;
403  }
404  } else if (!td->left_comp_ctx[row7]) {
405  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
406  c = 1;
407  } else {
408  c = (refl == s->s.h.varcompref[1] &&
409  refa != s->s.h.varcompref[1]) ? 2 : 4;
410  }
411  } else if (!s->above_comp_ctx[col]) {
412  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
413  c = 1;
414  } else {
415  c = (refa == s->s.h.varcompref[1] &&
416  refl != s->s.h.varcompref[1]) ? 2 : 4;
417  }
418  } else {
419  c = (refl == refa) ? 4 : 2;
420  }
421  }
422  } else {
423  if (s->above_intra_ctx[col]) {
424  c = 2;
425  } else if (s->above_comp_ctx[col]) {
426  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
427  } else {
428  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
429  }
430  }
431  } else if (have_l) {
432  if (td->left_intra_ctx[row7]) {
433  c = 2;
434  } else if (td->left_comp_ctx[row7]) {
435  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
436  } else {
437  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
438  }
439  } else {
440  c = 2;
441  }
442  bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
443  b->ref[var_idx] = s->s.h.varcompref[bit];
444  td->counts.comp_ref[c][bit]++;
445  } else /* single reference */ {
446  int bit, c;
447 
448  if (have_a && !s->above_intra_ctx[col]) {
449  if (have_l && !td->left_intra_ctx[row7]) {
450  if (td->left_comp_ctx[row7]) {
451  if (s->above_comp_ctx[col]) {
452  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
453  !s->above_ref_ctx[col]);
454  } else {
455  c = (3 * !s->above_ref_ctx[col]) +
456  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
457  }
458  } else if (s->above_comp_ctx[col]) {
459  c = (3 * !td->left_ref_ctx[row7]) +
460  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
461  } else {
462  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
463  }
464  } else if (s->above_intra_ctx[col]) {
465  c = 2;
466  } else if (s->above_comp_ctx[col]) {
467  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
468  } else {
469  c = 4 * (!s->above_ref_ctx[col]);
470  }
471  } else if (have_l && !td->left_intra_ctx[row7]) {
472  if (td->left_intra_ctx[row7]) {
473  c = 2;
474  } else if (td->left_comp_ctx[row7]) {
475  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
476  } else {
477  c = 4 * (!td->left_ref_ctx[row7]);
478  }
479  } else {
480  c = 2;
481  }
482  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
483  td->counts.single_ref[c][0][bit]++;
484  if (!bit) {
485  b->ref[0] = 0;
486  } else {
487  // FIXME can this codeblob be replaced by some sort of LUT?
488  if (have_a) {
489  if (have_l) {
490  if (td->left_intra_ctx[row7]) {
491  if (s->above_intra_ctx[col]) {
492  c = 2;
493  } else if (s->above_comp_ctx[col]) {
494  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
495  s->above_ref_ctx[col] == 1);
496  } else if (!s->above_ref_ctx[col]) {
497  c = 3;
498  } else {
499  c = 4 * (s->above_ref_ctx[col] == 1);
500  }
501  } else if (s->above_intra_ctx[col]) {
502  if (td->left_intra_ctx[row7]) {
503  c = 2;
504  } else if (td->left_comp_ctx[row7]) {
505  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
506  td->left_ref_ctx[row7] == 1);
507  } else if (!td->left_ref_ctx[row7]) {
508  c = 3;
509  } else {
510  c = 4 * (td->left_ref_ctx[row7] == 1);
511  }
512  } else if (s->above_comp_ctx[col]) {
513  if (td->left_comp_ctx[row7]) {
514  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
515  c = 3 * (s->s.h.fixcompref == 1 ||
516  td->left_ref_ctx[row7] == 1);
517  } else {
518  c = 2;
519  }
520  } else if (!td->left_ref_ctx[row7]) {
521  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
522  s->above_ref_ctx[col] == 1);
523  } else {
524  c = 3 * (td->left_ref_ctx[row7] == 1) +
525  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
526  }
527  } else if (td->left_comp_ctx[row7]) {
528  if (!s->above_ref_ctx[col]) {
529  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
530  td->left_ref_ctx[row7] == 1);
531  } else {
532  c = 3 * (s->above_ref_ctx[col] == 1) +
533  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
534  }
535  } else if (!s->above_ref_ctx[col]) {
536  if (!td->left_ref_ctx[row7]) {
537  c = 3;
538  } else {
539  c = 4 * (td->left_ref_ctx[row7] == 1);
540  }
541  } else if (!td->left_ref_ctx[row7]) {
542  c = 4 * (s->above_ref_ctx[col] == 1);
543  } else {
544  c = 2 * (td->left_ref_ctx[row7] == 1) +
545  2 * (s->above_ref_ctx[col] == 1);
546  }
547  } else {
548  if (s->above_intra_ctx[col] ||
549  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
550  c = 2;
551  } else if (s->above_comp_ctx[col]) {
552  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
553  } else {
554  c = 4 * (s->above_ref_ctx[col] == 1);
555  }
556  }
557  } else if (have_l) {
558  if (td->left_intra_ctx[row7] ||
559  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
560  c = 2;
561  } else if (td->left_comp_ctx[row7]) {
562  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
563  } else {
564  c = 4 * (td->left_ref_ctx[row7] == 1);
565  }
566  } else {
567  c = 2;
568  }
569  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
570  td->counts.single_ref[c][1][bit]++;
571  b->ref[0] = 1 + bit;
572  }
573  }
574  }
575 
576  if (b->bs <= BS_8x8) {
577  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
578  b->mode[0] =
579  b->mode[1] =
580  b->mode[2] =
581  b->mode[3] = ZEROMV;
582  } else {
583  static const uint8_t off[10] = {
584  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
585  };
586 
587  // FIXME this needs to use the LUT tables from find_ref_mvs
588  // because not all are -1,0/0,-1
589  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
590  [td->left_mode_ctx[row7 + off[b->bs]]];
591 
593  s->prob.p.mv_mode[c]);
594  b->mode[1] =
595  b->mode[2] =
596  b->mode[3] = b->mode[0];
597  td->counts.mv_mode[c][b->mode[0] - 10]++;
598  }
599  }
600 
601  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
602  int c;
603 
604  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
605  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
606  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
607  td->left_filter_ctx[row7] : 3;
608  } else {
609  c = s->above_filter_ctx[col];
610  }
611  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
612  c = td->left_filter_ctx[row7];
613  } else {
614  c = 3;
615  }
616 
617  filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree,
618  s->prob.p.filter[c]);
619  td->counts.filter[c][filter_id]++;
620  b->filter = ff_vp9_filter_lut[filter_id];
621  } else {
622  b->filter = s->s.h.filtermode;
623  }
624 
625  if (b->bs > BS_8x8) {
626  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
627 
629  s->prob.p.mv_mode[c]);
630  td->counts.mv_mode[c][b->mode[0] - 10]++;
631  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
632 
633  if (b->bs != BS_8x4) {
635  s->prob.p.mv_mode[c]);
636  td->counts.mv_mode[c][b->mode[1] - 10]++;
637  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
638  } else {
639  b->mode[1] = b->mode[0];
640  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
641  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
642  }
643 
644  if (b->bs != BS_4x8) {
646  s->prob.p.mv_mode[c]);
647  td->counts.mv_mode[c][b->mode[2] - 10]++;
648  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
649 
650  if (b->bs != BS_8x4) {
652  s->prob.p.mv_mode[c]);
653  td->counts.mv_mode[c][b->mode[3] - 10]++;
654  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
655  } else {
656  b->mode[3] = b->mode[2];
657  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
658  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
659  }
660  } else {
661  b->mode[2] = b->mode[0];
662  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
663  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
664  b->mode[3] = b->mode[1];
665  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
666  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
667  }
668  } else {
669  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
670  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
671  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
672  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
673  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
674  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
675  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
676  }
677 
678  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
679  }
680 
681 #if HAVE_FAST_64BIT
682 #define SPLAT_CTX(var, val, n) \
683  switch (n) { \
684  case 1: var = val; break; \
685  case 2: AV_WN16A(&var, val * 0x0101); break; \
686  case 4: AV_WN32A(&var, val * 0x01010101); break; \
687  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
688  case 16: { \
689  uint64_t v64 = val * 0x0101010101010101ULL; \
690  AV_WN64A( &var, v64); \
691  AV_WN64A(&((uint8_t *) &var)[8], v64); \
692  break; \
693  } \
694  }
695 #else
696 #define SPLAT_CTX(var, val, n) \
697  switch (n) { \
698  case 1: var = val; break; \
699  case 2: AV_WN16A(&var, val * 0x0101); break; \
700  case 4: AV_WN32A(&var, val * 0x01010101); break; \
701  case 8: { \
702  uint32_t v32 = val * 0x01010101; \
703  AV_WN32A( &var, v32); \
704  AV_WN32A(&((uint8_t *) &var)[4], v32); \
705  break; \
706  } \
707  case 16: { \
708  uint32_t v32 = val * 0x01010101; \
709  AV_WN32A( &var, v32); \
710  AV_WN32A(&((uint8_t *) &var)[4], v32); \
711  AV_WN32A(&((uint8_t *) &var)[8], v32); \
712  AV_WN32A(&((uint8_t *) &var)[12], v32); \
713  break; \
714  } \
715  }
716 #endif
717 
718  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
719 #define SET_CTXS(perf, dir, off, n) \
720  do { \
721  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
722  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
723  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
724  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
725  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
726  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
727  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
728  if (!b->intra) { \
729  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
730  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
731  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
732  } \
733  } \
734  } \
735  } while (0)
736  case 1: SET_CTXS(s, above, col, 1); break;
737  case 2: SET_CTXS(s, above, col, 2); break;
738  case 4: SET_CTXS(s, above, col, 4); break;
739  case 8: SET_CTXS(s, above, col, 8); break;
740  }
741  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
742  case 1: SET_CTXS(td, left, row7, 1); break;
743  case 2: SET_CTXS(td, left, row7, 2); break;
744  case 4: SET_CTXS(td, left, row7, 4); break;
745  case 8: SET_CTXS(td, left, row7, 8); break;
746  }
747 #undef SPLAT_CTX
748 #undef SET_CTXS
749 
750  if (!s->s.h.keyframe && !s->s.h.intraonly) {
751  if (b->bs > BS_8x8) {
752  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
753 
754  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
755  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
756  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
757  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
758  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
759  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
760  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
761  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
762  } else {
763  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
764 
765  for (n = 0; n < w4 * 2; n++) {
766  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
767  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
768  }
769  for (n = 0; n < h4 * 2; n++) {
770  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
771  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
772  }
773  }
774  }
775 
776  // FIXME kinda ugly
777  for (y = 0; y < h4; y++) {
778  int x, o = (row + y) * s->sb_cols * 8 + col;
779  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
780 
781  if (b->intra) {
782  for (x = 0; x < w4; x++) {
783  mv[x].ref[0] =
784  mv[x].ref[1] = -1;
785  }
786  } else if (b->comp) {
787  for (x = 0; x < w4; x++) {
788  mv[x].ref[0] = b->ref[0];
789  mv[x].ref[1] = b->ref[1];
790  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
791  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
792  }
793  } else {
794  for (x = 0; x < w4; x++) {
795  mv[x].ref[0] = b->ref[0];
796  mv[x].ref[1] = -1;
797  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
798  }
799  }
800  }
801 }
802 
803 // FIXME merge cnt/eob arguments?
804 static av_always_inline int
805 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
806  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
807  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
808  int nnz, const int16_t *scan, const int16_t (*nb)[2],
809  const int16_t *band_counts, int16_t *qmul)
810 {
811  int i = 0, band = 0, band_left = band_counts[band];
812  const uint8_t *tp = p[0][nnz];
813  uint8_t cache[1024];
814 
815  do {
816  int val, rc;
817 
818  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
819  eob[band][nnz][val]++;
820  if (!val)
821  break;
822 
823 skip_eob:
824  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
825  cnt[band][nnz][0]++;
826  if (!--band_left)
827  band_left = band_counts[++band];
828  cache[scan[i]] = 0;
829  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
830  tp = p[band][nnz];
831  if (++i == n_coeffs)
832  break; //invalid input; blocks should end with EOB
833  goto skip_eob;
834  }
835 
836  rc = scan[i];
837  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
838  cnt[band][nnz][1]++;
839  val = 1;
840  cache[rc] = 1;
841  } else {
842  cnt[band][nnz][2]++;
843  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
844  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
845  cache[rc] = val = 2;
846  } else {
847  val = 3 + vp56_rac_get_prob(c, tp[5]);
848  cache[rc] = 3;
849  }
850  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
851  cache[rc] = 4;
852  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
853  val = vp56_rac_get_prob(c, 159) + 5;
854  } else {
855  val = (vp56_rac_get_prob(c, 165) << 1) + 7;
856  val += vp56_rac_get_prob(c, 145);
857  }
858  } else { // cat 3-6
859  cache[rc] = 5;
860  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
861  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
862  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
863  val += (vp56_rac_get_prob(c, 148) << 1);
864  val += vp56_rac_get_prob(c, 140);
865  } else {
866  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
867  val += (vp56_rac_get_prob(c, 155) << 2);
868  val += (vp56_rac_get_prob(c, 140) << 1);
869  val += vp56_rac_get_prob(c, 135);
870  }
871  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
872  val = (vp56_rac_get_prob(c, 180) << 4) + 35;
873  val += (vp56_rac_get_prob(c, 157) << 3);
874  val += (vp56_rac_get_prob(c, 141) << 2);
875  val += (vp56_rac_get_prob(c, 134) << 1);
876  val += vp56_rac_get_prob(c, 130);
877  } else {
878  val = 67;
879  if (!is8bitsperpixel) {
880  if (bpp == 12) {
881  val += vp56_rac_get_prob(c, 255) << 17;
882  val += vp56_rac_get_prob(c, 255) << 16;
883  }
884  val += (vp56_rac_get_prob(c, 255) << 15);
885  val += (vp56_rac_get_prob(c, 255) << 14);
886  }
887  val += (vp56_rac_get_prob(c, 254) << 13);
888  val += (vp56_rac_get_prob(c, 254) << 12);
889  val += (vp56_rac_get_prob(c, 254) << 11);
890  val += (vp56_rac_get_prob(c, 252) << 10);
891  val += (vp56_rac_get_prob(c, 249) << 9);
892  val += (vp56_rac_get_prob(c, 243) << 8);
893  val += (vp56_rac_get_prob(c, 230) << 7);
894  val += (vp56_rac_get_prob(c, 196) << 6);
895  val += (vp56_rac_get_prob(c, 177) << 5);
896  val += (vp56_rac_get_prob(c, 153) << 4);
897  val += (vp56_rac_get_prob(c, 140) << 3);
898  val += (vp56_rac_get_prob(c, 133) << 2);
899  val += (vp56_rac_get_prob(c, 130) << 1);
900  val += vp56_rac_get_prob(c, 129);
901  }
902  }
903  }
904 #define STORE_COEF(c, i, v) do { \
905  if (is8bitsperpixel) { \
906  c[i] = v; \
907  } else { \
908  AV_WN32A(&c[i * 2], v); \
909  } \
910 } while (0)
911  if (!--band_left)
912  band_left = band_counts[++band];
913  if (is_tx32x32)
914  STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
915  else
916  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
917  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
918  tp = p[band][nnz];
919  } while (++i < n_coeffs);
920 
921  return i;
922 }
923 
924 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
925  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
926  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
927  const int16_t (*nb)[2], const int16_t *band_counts,
928  int16_t *qmul)
929 {
930  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
931  nnz, scan, nb, band_counts, qmul);
932 }
933 
934 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
935  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
936  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
937  const int16_t (*nb)[2], const int16_t *band_counts,
938  int16_t *qmul)
939 {
940  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
941  nnz, scan, nb, band_counts, qmul);
942 }
943 
944 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
945  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
946  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
947  const int16_t (*nb)[2], const int16_t *band_counts,
948  int16_t *qmul)
949 {
950  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
951  nnz, scan, nb, band_counts, qmul);
952 }
953 
954 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
955  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
956  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
957  const int16_t (*nb)[2], const int16_t *band_counts,
958  int16_t *qmul)
959 {
960  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
961  nnz, scan, nb, band_counts, qmul);
962 }
963 
964 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
965 {
966  VP9Context *s = td->s;
967  VP9Block *b = td->b;
968  int row = td->row, col = td->col;
969  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
970  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
971  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
972  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
973  int end_x = FFMIN(2 * (s->cols - col), w4);
974  int end_y = FFMIN(2 * (s->rows - row), h4);
975  int n, pl, x, y, ret;
976  int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
977  int tx = 4 * s->s.h.lossless + b->tx;
978  const int16_t * const *yscans = ff_vp9_scans[tx];
979  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
980  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
981  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
982  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
983  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
984  static const int16_t band_counts[4][8] = {
985  { 1, 2, 3, 4, 3, 16 - 13 },
986  { 1, 2, 3, 4, 11, 64 - 21 },
987  { 1, 2, 3, 4, 11, 256 - 21 },
988  { 1, 2, 3, 4, 11, 1024 - 21 },
989  };
990  const int16_t *y_band_counts = band_counts[b->tx];
991  const int16_t *uv_band_counts = band_counts[b->uvtx];
992  int bytesperpixel = is8bitsperpixel ? 1 : 2;
993  int total_coeff = 0;
994 
995 #define MERGE(la, end, step, rd) \
996  for (n = 0; n < end; n += step) \
997  la[n] = !!rd(&la[n])
998 #define MERGE_CTX(step, rd) \
999  do { \
1000  MERGE(l, end_y, step, rd); \
1001  MERGE(a, end_x, step, rd); \
1002  } while (0)
1003 
1004 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1005  for (n = 0, y = 0; y < end_y; y += step) { \
1006  for (x = 0; x < end_x; x += step, n += step * step) { \
1007  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1008  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1009  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1010  c, e, p, a[x] + l[y], yscans[txtp], \
1011  ynbs[txtp], y_band_counts, qmul[0]); \
1012  a[x] = l[y] = !!ret; \
1013  total_coeff |= !!ret; \
1014  if (step >= 4) { \
1015  AV_WN16A(&td->eob[n], ret); \
1016  } else { \
1017  td->eob[n] = ret; \
1018  } \
1019  } \
1020  }
1021 
1022 #define SPLAT(la, end, step, cond) \
1023  if (step == 2) { \
1024  for (n = 1; n < end; n += step) \
1025  la[n] = la[n - 1]; \
1026  } else if (step == 4) { \
1027  if (cond) { \
1028  for (n = 0; n < end; n += step) \
1029  AV_WN32A(&la[n], la[n] * 0x01010101); \
1030  } else { \
1031  for (n = 0; n < end; n += step) \
1032  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1033  } \
1034  } else /* step == 8 */ { \
1035  if (cond) { \
1036  if (HAVE_FAST_64BIT) { \
1037  for (n = 0; n < end; n += step) \
1038  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1039  } else { \
1040  for (n = 0; n < end; n += step) { \
1041  uint32_t v32 = la[n] * 0x01010101; \
1042  AV_WN32A(&la[n], v32); \
1043  AV_WN32A(&la[n + 4], v32); \
1044  } \
1045  } \
1046  } else { \
1047  for (n = 0; n < end; n += step) \
1048  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1049  } \
1050  }
1051 #define SPLAT_CTX(step) \
1052  do { \
1053  SPLAT(a, end_x, step, end_x == w4); \
1054  SPLAT(l, end_y, step, end_y == h4); \
1055  } while (0)
1056 
1057  /* y tokens */
1058  switch (b->tx) {
1059  case TX_4X4:
1060  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1061  break;
1062  case TX_8X8:
1063  MERGE_CTX(2, AV_RN16A);
1064  DECODE_Y_COEF_LOOP(2, 0,);
1065  SPLAT_CTX(2);
1066  break;
1067  case TX_16X16:
1068  MERGE_CTX(4, AV_RN32A);
1069  DECODE_Y_COEF_LOOP(4, 0,);
1070  SPLAT_CTX(4);
1071  break;
1072  case TX_32X32:
1073  MERGE_CTX(8, AV_RN64A);
1074  DECODE_Y_COEF_LOOP(8, 0, 32);
1075  SPLAT_CTX(8);
1076  break;
1077  }
1078 
1079 #define DECODE_UV_COEF_LOOP(step, v) \
1080  for (n = 0, y = 0; y < end_y; y += step) { \
1081  for (x = 0; x < end_x; x += step, n += step * step) { \
1082  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1083  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1084  16 * step * step, c, e, p, a[x] + l[y], \
1085  uvscan, uvnb, uv_band_counts, qmul[1]); \
1086  a[x] = l[y] = !!ret; \
1087  total_coeff |= !!ret; \
1088  if (step >= 4) { \
1089  AV_WN16A(&td->uveob[pl][n], ret); \
1090  } else { \
1091  td->uveob[pl][n] = ret; \
1092  } \
1093  } \
1094  }
1095 
1096  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1097  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1098  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1099  w4 >>= s->ss_h;
1100  end_x >>= s->ss_h;
1101  h4 >>= s->ss_v;
1102  end_y >>= s->ss_v;
1103  for (pl = 0; pl < 2; pl++) {
1104  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1105  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1106  switch (b->uvtx) {
1107  case TX_4X4:
1108  DECODE_UV_COEF_LOOP(1,);
1109  break;
1110  case TX_8X8:
1111  MERGE_CTX(2, AV_RN16A);
1112  DECODE_UV_COEF_LOOP(2,);
1113  SPLAT_CTX(2);
1114  break;
1115  case TX_16X16:
1116  MERGE_CTX(4, AV_RN32A);
1117  DECODE_UV_COEF_LOOP(4,);
1118  SPLAT_CTX(4);
1119  break;
1120  case TX_32X32:
1121  MERGE_CTX(8, AV_RN64A);
1122  DECODE_UV_COEF_LOOP(8, 32);
1123  SPLAT_CTX(8);
1124  break;
1125  }
1126  }
1127 
1128  return total_coeff;
1129 }
1130 
1132 {
1133  return decode_coeffs(td, 1);
1134 }
1135 
1137 {
1138  return decode_coeffs(td, 0);
1139 }
1140 
1141 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1142  int row_and_7, int col_and_7,
1143  int w, int h, int col_end, int row_end,
1144  enum TxfmMode tx, int skip_inter)
1145 {
1146  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1147  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1148 
1149  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1150  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1151  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1152  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1153 
1154  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1155  // edges. This means that for UV, we work on two subsampled blocks at
1156  // a time, and we only use the topleft block's mode information to set
1157  // things like block strength. Thus, for any block size smaller than
1158  // 16x16, ignore the odd portion of the block.
1159  if (tx == TX_4X4 && (ss_v | ss_h)) {
1160  if (h == ss_v) {
1161  if (row_and_7 & 1)
1162  return;
1163  if (!row_end)
1164  h += 1;
1165  }
1166  if (w == ss_h) {
1167  if (col_and_7 & 1)
1168  return;
1169  if (!col_end)
1170  w += 1;
1171  }
1172  }
1173 
1174  if (tx == TX_4X4 && !skip_inter) {
1175  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1176  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1177  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1178 
1179  for (y = row_and_7; y < h + row_and_7; y++) {
1180  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1181 
1182  mask[0][y][1] |= m_row_8;
1183  mask[0][y][2] |= m_row_4;
1184  // for odd lines, if the odd col is not being filtered,
1185  // skip odd row also:
1186  // .---. <-- a
1187  // | |
1188  // |___| <-- b
1189  // ^ ^
1190  // c d
1191  //
1192  // if a/c are even row/col and b/d are odd, and d is skipped,
1193  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1194  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1195  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1196  } else {
1197  mask[1][y][col_mask_id] |= m_col;
1198  }
1199  if (!ss_h)
1200  mask[0][y][3] |= m_col;
1201  if (!ss_v) {
1202  if (ss_h && (col_end & 1))
1203  mask[1][y][3] |= (t << (w - 1)) - t;
1204  else
1205  mask[1][y][3] |= m_col;
1206  }
1207  }
1208  } else {
1209  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1210 
1211  if (!skip_inter) {
1212  int mask_id = (tx == TX_8X8);
1213  int l2 = tx + ss_h - 1, step1d;
1214  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1215  int m_row = m_col & masks[l2];
1216 
1217  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1218  // 8wd loopfilter to prevent going off the visible edge.
1219  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1220  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1221  int m_row_8 = m_row - m_row_16;
1222 
1223  for (y = row_and_7; y < h + row_and_7; y++) {
1224  mask[0][y][0] |= m_row_16;
1225  mask[0][y][1] |= m_row_8;
1226  }
1227  } else {
1228  for (y = row_and_7; y < h + row_and_7; y++)
1229  mask[0][y][mask_id] |= m_row;
1230  }
1231 
1232  l2 = tx + ss_v - 1;
1233  step1d = 1 << l2;
1234  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1235  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1236  mask[1][y][0] |= m_col;
1237  if (y - row_and_7 == h - 1)
1238  mask[1][y][1] |= m_col;
1239  } else {
1240  for (y = row_and_7; y < h + row_and_7; y += step1d)
1241  mask[1][y][mask_id] |= m_col;
1242  }
1243  } else if (tx != TX_4X4) {
1244  int mask_id;
1245 
1246  mask_id = (tx == TX_8X8) || (h == ss_v);
1247  mask[1][row_and_7][mask_id] |= m_col;
1248  mask_id = (tx == TX_8X8) || (w == ss_h);
1249  for (y = row_and_7; y < h + row_and_7; y++)
1250  mask[0][y][mask_id] |= t;
1251  } else {
1252  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1253 
1254  for (y = row_and_7; y < h + row_and_7; y++) {
1255  mask[0][y][2] |= t4;
1256  mask[0][y][1] |= t8;
1257  }
1258  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1259  }
1260  }
1261 }
1262 
1263 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1264  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1265  enum BlockLevel bl, enum BlockPartition bp)
1266 {
1267  VP9Context *s = td->s;
1268  VP9Block *b = td->b;
1269  enum BlockSize bs = bl * 3 + bp;
1270  int bytesperpixel = s->bytesperpixel;
1271  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1272  int emu[2];
1273  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1274 
1275  td->row = row;
1276  td->row7 = row & 7;
1277  td->col = col;
1278  td->col7 = col & 7;
1279 
1280  td->min_mv.x = -(128 + col * 64);
1281  td->min_mv.y = -(128 + row * 64);
1282  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1283  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1284 
1285  if (s->pass < 2) {
1286  b->bs = bs;
1287  b->bl = bl;
1288  b->bp = bp;
1289  decode_mode(td);
1290  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1291  (s->ss_v && h4 * 2 == (1 << b->tx)));
1292 
1293  if (!b->skip) {
1294  int has_coeffs;
1295 
1296  if (bytesperpixel == 1) {
1297  has_coeffs = decode_coeffs_8bpp(td);
1298  } else {
1299  has_coeffs = decode_coeffs_16bpp(td);
1300  }
1301  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1302  b->skip = 1;
1303  memset(&s->above_skip_ctx[col], 1, w4);
1304  memset(&td->left_skip_ctx[td->row7], 1, h4);
1305  }
1306  } else {
1307  int row7 = td->row7;
1308 
1309 #define SPLAT_ZERO_CTX(v, n) \
1310  switch (n) { \
1311  case 1: v = 0; break; \
1312  case 2: AV_ZERO16(&v); break; \
1313  case 4: AV_ZERO32(&v); break; \
1314  case 8: AV_ZERO64(&v); break; \
1315  case 16: AV_ZERO128(&v); break; \
1316  }
1317 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1318  do { \
1319  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1320  if (s->ss_##dir2) { \
1321  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1322  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1323  } else { \
1324  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1325  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1326  } \
1327  } while (0)
1328 
1329  switch (w4) {
1330  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1331  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1332  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1333  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1334  }
1335  switch (h4) {
1336  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1337  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1338  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1339  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1340  }
1341  }
1342 
1343  if (s->pass == 1) {
1344  s->td[0].b++;
1345  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1346  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1347  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1348  s->td[0].eob += 4 * w4 * h4;
1349  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1350  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1351 
1352  return;
1353  }
1354  }
1355 
1356  // emulated overhangs if the stride of the target buffer can't hold. This
1357  // makes it possible to support emu-edge and so on even if we have large block
1358  // overhangs
1359  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1360  (row + h4) > s->rows;
1361  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1362  (row + h4) > s->rows;
1363  if (emu[0]) {
1364  td->dst[0] = td->tmp_y;
1365  td->y_stride = 128;
1366  } else {
1367  td->dst[0] = f->data[0] + yoff;
1368  td->y_stride = f->linesize[0];
1369  }
1370  if (emu[1]) {
1371  td->dst[1] = td->tmp_uv[0];
1372  td->dst[2] = td->tmp_uv[1];
1373  td->uv_stride = 128;
1374  } else {
1375  td->dst[1] = f->data[1] + uvoff;
1376  td->dst[2] = f->data[2] + uvoff;
1377  td->uv_stride = f->linesize[1];
1378  }
1379  if (b->intra) {
1380  if (s->s.h.bpp > 8) {
1381  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1382  } else {
1383  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1384  }
1385  } else {
1386  if (s->s.h.bpp > 8) {
1388  } else {
1390  }
1391  }
1392  if (emu[0]) {
1393  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1394 
1395  for (n = 0; o < w; n++) {
1396  int bw = 64 >> n;
1397 
1398  av_assert2(n <= 4);
1399  if (w & bw) {
1400  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1401  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1402  o += bw;
1403  }
1404  }
1405  }
1406  if (emu[1]) {
1407  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1408  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1409 
1410  for (n = s->ss_h; o < w; n++) {
1411  int bw = 64 >> n;
1412 
1413  av_assert2(n <= 4);
1414  if (w & bw) {
1415  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1416  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1417  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1418  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1419  o += bw;
1420  }
1421  }
1422  }
1423 
1424  // pick filter level and find edges to apply filter to
1425  if (s->s.h.filter.level &&
1426  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1427  [b->mode[3] != ZEROMV]) > 0) {
1428  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1429  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1430 
1431  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1432  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1433  if (s->ss_h || s->ss_v)
1434  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1435  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1436  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1437  b->uvtx, skip_inter);
1438  }
1439 
1440  if (s->pass == 2) {
1441  s->td[0].b++;
1442  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1443  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1444  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1445  s->td[0].eob += 4 * w4 * h4;
1446  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1447  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1448  }
1449 }
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:636
DECODE_Y_COEF_LOOP
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
stride
int stride
Definition: mace.c:144
td
#define td
Definition: regdef.h:70
decode_coeffs_b32_8bpp
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:934
ff_vp9_default_kf_uvmode_probs
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
PRED_SWITCHABLE
@ PRED_SWITCHABLE
Definition: vp9shared.h:51
ff_vp9_filter_tree
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
n
int n
Definition: avisynth_c.h:760
SET_CTXS
#define SET_CTXS(perf, dir, off, n)
mv
static const int8_t mv[256][2]
Definition: 4xm.c:77
decode_coeffs
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:964
PRED_COMPREF
@ PRED_COMPREF
Definition: vp9shared.h:50
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
w
uint8_t w
Definition: llviddspenc.c:38
BlockPartition
BlockPartition
Definition: vp9shared.h:34
internal.h
b
#define b
Definition: input.c:41
REF_FRAME_SEGMAP
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:165
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
VP9Filter
Definition: vp9dec.h:73
BS_4x8
@ BS_4x8
Definition: vp9shared.h:89
decode_coeffs_b_16bpp
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:944
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
FILTER_SWITCHABLE
@ FILTER_SWITCHABLE
Definition: vp9.h:70
VP9Block
Definition: vp9dec.h:79
decode_mode
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:80
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
vp56_rac_get_prob_branchy
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:285
TX_SWITCHABLE
@ TX_SWITCHABLE
Definition: vp9.h:33
vp8_rac_get
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:322
ff_vp9_intramode_tree
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
mask_edges
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1141
ZEROMV
@ ZEROMV
Definition: vp9shared.h:44
avassert.h
ff_vp9_default_kf_ymode_probs
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
decode_coeffs_16bpp
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1136
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_vp9_inter_mode_tree
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
ff_vp9_scans
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
vp9data.h
AV_WN16A
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
BS_8x4
@ BS_8x4
Definition: vp9shared.h:88
f
#define f(width, name)
Definition: cbs_vp9.c:255
STORE_COEF
#define STORE_COEF(c, i, v)
vp56.h
if
if(ret)
Definition: filter_design.txt:179
MERGE_CTX
#define MERGE_CTX(step, rd)
VP9Context
Definition: vp9dec.h:91
vp8_rac_get_tree
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:394
TX_8X8
@ TX_8X8
Definition: vp9.h:29
TX_16X16
@ TX_16X16
Definition: vp9.h:30
ff_vp9_filter_lut
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
DECODE_UV_COEF_LOOP
#define DECODE_UV_COEF_LOOP(step, v)
ff_vp9_segmentation_tree
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
SPLAT_ZERO_YUV
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:293
TxfmMode
TxfmMode
Definition: vp9.h:27
vp9.h
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
ff_vp9_fill_mv
void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
AV_RN64A
#define AV_RN64A(p)
Definition: intreadwrite.h:530
TX_4X4
@ TX_4X4
Definition: vp9.h:28
t8
#define t8
Definition: regdef.h:53
val
const char const char void * val
Definition: avisynth_c.h:863
N_BS_SIZES
@ N_BS_SIZES
Definition: vp9shared.h:91
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
SPLAT_CTX
#define SPLAT_CTX(var, val, n)
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
t4
#define t4
Definition: regdef.h:32
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
av_always_inline
#define av_always_inline
Definition: attributes.h:43
uint8_t
uint8_t
Definition: audio_convert.c:194
BlockSize
BlockSize
Definition: vp9shared.h:77
AV_COPY32
#define AV_COPY32(d, s)
Definition: intreadwrite.h:601
decode_coeffs_b_generic
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:805
avcodec.h
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:526
ret
ret
Definition: filter_design.txt:187
pred
static const float pred[4]
Definition: siprdata.h:259
VP9mvrefPair
Definition: vp9shared.h:54
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
VP9TileData
Definition: vp9dec.h:157
AV_WN64A
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
VP56RangeCoder
Definition: vp56.h:85
VP9Filter::mask
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:76
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
setctx_2d
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:34
ff_vp9_decode_block
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1263
NEARESTMV
@ NEARESTMV
Definition: vp9shared.h:42
BlockLevel
BlockLevel
Definition: vp9shared.h:70
vp9dec.h
vp56_rac_get_prob
#define vp56_rac_get_prob
Definition: vp56.h:268
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TX_32X32
@ TX_32X32
Definition: vp9.h:31
decode_coeffs_b_8bpp
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:924
videodsp.h
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:288
h
h
Definition: vp9dsp_template.c:2038
decode_coeffs_8bpp
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1131
VP9Filter::level
uint8_t level[8 *8]
Definition: vp9dec.h:74
ff_vp9_scans_nb
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
decode_coeffs_b32_16bpp
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:954
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:641