FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "videodsp.h"
28 #include "vp56.h"
29 #include "vp9.h"
30 #include "vp9data.h"
31 #include "vp9dsp.h"
32 #include "libavutil/avassert.h"
33 
34 #define VP9_SYNCCODE 0x498342
35 
40 };
41 
42 enum BlockLevel {
47 };
48 
49 enum BlockSize {
64 };
65 
66 struct VP9mvrefPair {
67  VP56mv mv[2];
68  int8_t ref[2];
69 };
70 
71 struct VP9Filter {
72  uint8_t level[8 * 8];
73  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
74  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
75 };
76 
77 typedef struct VP9Block {
80  VP56mv mv[4 /* b_idx */][2 /* ref */];
81  enum BlockSize bs;
82  enum TxfmMode tx, uvtx;
83 
84  int row, row7, col, col7;
85  uint8_t *dst[3];
86  ptrdiff_t y_stride, uv_stride;
87 } VP9Block;
88 
89 typedef struct VP9Context {
95  unsigned c_b_size;
97 
98  // bitstream header
119  AVFrame *refs[8], *f, *fb[10];
120 
121  struct {
123  int8_t sharpness;
126  } filter;
127  struct {
129  int8_t mode[2];
130  int8_t ref[4];
131  } lf_delta;
135  struct {
140  struct {
146  int16_t q_val;
147  int8_t lf_val;
148  int16_t qmul[2][2];
149  uint8_t lflvl[4][2];
150  } feat[8];
151  } segmentation;
152  struct {
154  unsigned tile_cols, tile_rows;
156  } tiling;
157  unsigned sb_cols, sb_rows, rows, cols;
158  struct {
160  uint8_t coef[4][2][2][6][6][3];
161  } prob_ctx[4];
162  struct {
163  prob_context p;
164  uint8_t coef[4][2][2][6][6][11];
167  } prob;
168  struct {
169  unsigned y_mode[4][10];
170  unsigned uv_mode[10][10];
171  unsigned filter[4][3];
172  unsigned mv_mode[7][4];
173  unsigned intra[4][2];
174  unsigned comp[5][2];
175  unsigned single_ref[5][2][2];
176  unsigned comp_ref[5][2];
177  unsigned tx32p[2][4];
178  unsigned tx16p[2][3];
179  unsigned tx8p[2][2];
180  unsigned skip[3][2];
181  unsigned mv_joint[4];
182  struct {
183  unsigned sign[2];
184  unsigned classes[11];
185  unsigned class0[2];
186  unsigned bits[10][2];
187  unsigned class0_fp[2][4];
188  unsigned fp[4];
189  unsigned class0_hp[2];
190  unsigned hp[2];
191  } mv_comp[2];
192  unsigned partition[4][4][4];
193  unsigned coef[4][2][2][6][6][3];
194  unsigned eob[4][2][2][6][6][2];
195  } counts;
198 
199  // contextual (left/above) cache
202  // FIXME maybe merge some of the below in a flags field?
212  VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
213 
214  // whole-frame cache
217  struct VP9mvrefPair *mv[2];
218  struct VP9Filter *lflvl;
220 
221  // block reconstruction intermediates
222  DECLARE_ALIGNED(32, int16_t, block)[4096];
223  DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
224  uint8_t eob[256];
225  uint8_t uveob[2][64];
228  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
229 } VP9Context;
230 
231 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
232  {
233  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
234  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
235  }, {
236  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
237  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
238  }
239 };
240 
241 static int update_size(AVCodecContext *ctx, int w, int h)
242 {
243  VP9Context *s = ctx->priv_data;
244  uint8_t *p;
245 
246  if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
247  return 0;
248 
249  ctx->width = w;
250  ctx->height = h;
251  s->sb_cols = (w + 63) >> 6;
252  s->sb_rows = (h + 63) >> 6;
253  s->cols = (w + 7) >> 3;
254  s->rows = (h + 7) >> 3;
255 
256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
258  p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
259  64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
260  if (!p)
261  return AVERROR(ENOMEM);
263  assign(s->above_skip_ctx, uint8_t *, 8);
264  assign(s->above_txfm_ctx, uint8_t *, 8);
265  assign(s->above_mode_ctx, uint8_t *, 16);
266  assign(s->above_y_nnz_ctx, uint8_t *, 16);
267  assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
268  assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
269  assign(s->intra_pred_data[0], uint8_t *, 64);
270  assign(s->intra_pred_data[1], uint8_t *, 32);
271  assign(s->intra_pred_data[2], uint8_t *, 32);
272  assign(s->above_segpred_ctx, uint8_t *, 8);
273  assign(s->above_intra_ctx, uint8_t *, 8);
274  assign(s->above_comp_ctx, uint8_t *, 8);
275  assign(s->above_ref_ctx, uint8_t *, 8);
276  assign(s->above_filter_ctx, uint8_t *, 8);
277  assign(s->lflvl, struct VP9Filter *, 1);
278  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
279  assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
280  assign(s->mv[0], struct VP9mvrefPair *, 64 * s->sb_rows);
281  assign(s->mv[1], struct VP9mvrefPair *, 64 * s->sb_rows);
282 #undef assign
283 
284  return 0;
285 }
286 
287 // for some reason the sign bit is at the end, not the start, of a bit sequence
289 {
290  int v = get_bits(gb, n);
291  return get_bits1(gb) ? -v : v;
292 }
293 
295 {
296  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
297 }
298 
299 // differential forward probability updates
300 static int update_prob(VP56RangeCoder *c, int p)
301 {
302  static const int inv_map_table[254] = {
303  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
304  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
305  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
306  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
307  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
308  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
309  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
310  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
311  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
312  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
313  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
314  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
315  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
316  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
317  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
318  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
319  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
320  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
321  252, 253,
322  };
323  int d;
324 
325  /* This code is trying to do a differential probability update. For a
326  * current probability A in the range [1, 255], the difference to a new
327  * probability of any value can be expressed differentially as 1-A,255-A
328  * where some part of this (absolute range) exists both in positive as
329  * well as the negative part, whereas another part only exists in one
330  * half. We're trying to code this shared part differentially, i.e.
331  * times two where the value of the lowest bit specifies the sign, and
332  * the single part is then coded on top of this. This absolute difference
333  * then again has a value of [0,254], but a bigger value in this range
334  * indicates that we're further away from the original value A, so we
335  * can code this as a VLC code, since higher values are increasingly
336  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
337  * updates vs. the 'fine, exact' updates further down the range, which
338  * adds one extra dimension to this differential update model. */
339 
340  if (!vp8_rac_get(c)) {
341  d = vp8_rac_get_uint(c, 4) + 0;
342  } else if (!vp8_rac_get(c)) {
343  d = vp8_rac_get_uint(c, 4) + 16;
344  } else if (!vp8_rac_get(c)) {
345  d = vp8_rac_get_uint(c, 5) + 32;
346  } else {
347  d = vp8_rac_get_uint(c, 7);
348  if (d >= 65)
349  d = (d << 1) - 65 + vp8_rac_get(c);
350  d += 64;
351  }
352 
353  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
354  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
355 }
356 
358  const uint8_t *data, int size, int *ref)
359 {
360  VP9Context *s = ctx->priv_data;
361  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
362  const uint8_t *data2;
363 
364  /* general header */
365  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
366  av_log(ctx, AV_LOG_ERROR, "Failed to intialize bitstream reader\n");
367  return res;
368  }
369  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
370  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
371  return AVERROR_INVALIDDATA;
372  }
373  s->profile = get_bits1(&s->gb);
374  if (get_bits1(&s->gb)) { // reserved bit
375  av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
376  return AVERROR_INVALIDDATA;
377  }
378  if (get_bits1(&s->gb)) {
379  *ref = get_bits(&s->gb, 3);
380  return 0;
381  }
382  s->last_keyframe = s->keyframe;
383  s->keyframe = !get_bits1(&s->gb);
384  s->last_invisible = s->invisible;
385  s->invisible = !get_bits1(&s->gb);
386  s->errorres = get_bits1(&s->gb);
387  // FIXME disable this upon resolution change
389  if (s->keyframe) {
390  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
391  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
392  return AVERROR_INVALIDDATA;
393  }
394  s->colorspace = get_bits(&s->gb, 3);
395  if (s->colorspace == 7) { // RGB = profile 1
396  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
397  return AVERROR_INVALIDDATA;
398  }
399  s->fullrange = get_bits1(&s->gb);
400  // for profile 1, here follows the subsampling bits
401  s->refreshrefmask = 0xff;
402  w = get_bits(&s->gb, 16) + 1;
403  h = get_bits(&s->gb, 16) + 1;
404  if (get_bits1(&s->gb)) // display size
405  skip_bits(&s->gb, 32);
406  } else {
407  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
408  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
409  if (s->intraonly) {
410  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
411  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
412  return AVERROR_INVALIDDATA;
413  }
414  s->refreshrefmask = get_bits(&s->gb, 8);
415  w = get_bits(&s->gb, 16) + 1;
416  h = get_bits(&s->gb, 16) + 1;
417  if (get_bits1(&s->gb)) // display size
418  skip_bits(&s->gb, 32);
419  } else {
420  s->refreshrefmask = get_bits(&s->gb, 8);
421  s->refidx[0] = get_bits(&s->gb, 3);
422  s->signbias[0] = get_bits1(&s->gb);
423  s->refidx[1] = get_bits(&s->gb, 3);
424  s->signbias[1] = get_bits1(&s->gb);
425  s->refidx[2] = get_bits(&s->gb, 3);
426  s->signbias[2] = get_bits1(&s->gb);
427  if (!s->refs[s->refidx[0]] || !s->refs[s->refidx[1]] ||
428  !s->refs[s->refidx[2]]) {
429  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
430  return AVERROR_INVALIDDATA;
431  }
432  if (get_bits1(&s->gb)) {
433  w = s->refs[s->refidx[0]]->width;
434  h = s->refs[s->refidx[0]]->height;
435  } else if (get_bits1(&s->gb)) {
436  w = s->refs[s->refidx[1]]->width;
437  h = s->refs[s->refidx[1]]->height;
438  } else if (get_bits1(&s->gb)) {
439  w = s->refs[s->refidx[2]]->width;
440  h = s->refs[s->refidx[2]]->height;
441  } else {
442  w = get_bits(&s->gb, 16) + 1;
443  h = get_bits(&s->gb, 16) + 1;
444  }
445  if (get_bits1(&s->gb)) // display size
446  skip_bits(&s->gb, 32);
447  s->highprecisionmvs = get_bits1(&s->gb);
449  get_bits(&s->gb, 2);
450  s->allowcompinter = s->signbias[0] != s->signbias[1] ||
451  s->signbias[0] != s->signbias[2];
452  if (s->allowcompinter) {
453  if (s->signbias[0] == s->signbias[1]) {
454  s->fixcompref = 2;
455  s->varcompref[0] = 0;
456  s->varcompref[1] = 1;
457  } else if (s->signbias[0] == s->signbias[2]) {
458  s->fixcompref = 1;
459  s->varcompref[0] = 0;
460  s->varcompref[1] = 2;
461  } else {
462  s->fixcompref = 0;
463  s->varcompref[0] = 1;
464  s->varcompref[1] = 2;
465  }
466  }
467  }
468  }
469  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
470  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
471  s->framectxid = c = get_bits(&s->gb, 2);
472 
473  /* loopfilter header data */
474  s->filter.level = get_bits(&s->gb, 6);
475  sharp = get_bits(&s->gb, 3);
476  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
477  // the old cache values since they are still valid
478  if (s->filter.sharpness != sharp)
479  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
480  s->filter.sharpness = sharp;
481  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
482  if (get_bits1(&s->gb)) {
483  for (i = 0; i < 4; i++)
484  if (get_bits1(&s->gb))
485  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
486  for (i = 0; i < 2; i++)
487  if (get_bits1(&s->gb))
488  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
489  }
490  } else {
491  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
492  }
493 
494  /* quantization header data */
495  s->yac_qi = get_bits(&s->gb, 8);
496  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
497  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
498  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
499  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
500  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
501 
502  /* segmentation header info */
503  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
504  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
505  for (i = 0; i < 7; i++)
506  s->prob.seg[i] = get_bits1(&s->gb) ?
507  get_bits(&s->gb, 8) : 255;
508  if ((s->segmentation.temporal = get_bits1(&s->gb)))
509  for (i = 0; i < 3; i++)
510  s->prob.segpred[i] = get_bits1(&s->gb) ?
511  get_bits(&s->gb, 8) : 255;
512  }
513 
514  if (get_bits1(&s->gb)) {
516  for (i = 0; i < 8; i++) {
517  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
518  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
519  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
520  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
521  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
522  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
523  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
524  }
525  }
526  } else {
527  s->segmentation.feat[0].q_enabled = 0;
528  s->segmentation.feat[0].lf_enabled = 0;
529  s->segmentation.feat[0].skip_enabled = 0;
530  s->segmentation.feat[0].ref_enabled = 0;
531  }
532 
533  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
534  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
535  int qyac, qydc, quvac, quvdc, lflvl, sh;
536 
537  if (s->segmentation.feat[i].q_enabled) {
539  qyac = s->segmentation.feat[i].q_val;
540  else
541  qyac = s->yac_qi + s->segmentation.feat[i].q_val;
542  } else {
543  qyac = s->yac_qi;
544  }
545  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
546  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
547  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
548  qyac = av_clip_uintp2(qyac, 8);
549 
550  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
551  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
552  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
553  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
554 
555  sh = s->filter.level >= 32;
556  if (s->segmentation.feat[i].lf_enabled) {
558  lflvl = s->segmentation.feat[i].lf_val;
559  else
560  lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
561  } else {
562  lflvl = s->filter.level;
563  }
564  s->segmentation.feat[i].lflvl[0][0] =
565  s->segmentation.feat[i].lflvl[0][1] =
566  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
567  for (j = 1; j < 4; j++) {
568  s->segmentation.feat[i].lflvl[j][0] =
569  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
570  s->lf_delta.mode[0]) << sh), 6);
571  s->segmentation.feat[i].lflvl[j][1] =
572  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
573  s->lf_delta.mode[1]) << sh), 6);
574  }
575  }
576 
577  /* tiling info */
578  if ((res = update_size(ctx, w, h)) < 0) {
579  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
580  return res;
581  }
582  for (s->tiling.log2_tile_cols = 0;
583  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
584  s->tiling.log2_tile_cols++) ;
585  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
586  max = FFMAX(0, max - 1);
587  while (max > s->tiling.log2_tile_cols) {
588  if (get_bits1(&s->gb))
589  s->tiling.log2_tile_cols++;
590  else
591  break;
592  }
593  s->tiling.log2_tile_rows = decode012(&s->gb);
594  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
595  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
596  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
597  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
598  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
599  if (!s->c_b) {
600  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
601  return AVERROR(ENOMEM);
602  }
603  }
604 
605  if (s->keyframe || s->errorres || s->intraonly) {
606  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
607  s->prob_ctx[3].p = vp9_default_probs;
608  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
609  sizeof(vp9_default_coef_probs));
610  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
611  sizeof(vp9_default_coef_probs));
612  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
613  sizeof(vp9_default_coef_probs));
614  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
615  sizeof(vp9_default_coef_probs));
616  }
617 
618  // next 16 bits is size of the rest of the header (arith-coded)
619  size2 = get_bits(&s->gb, 16);
620  data2 = align_get_bits(&s->gb);
621  if (size2 > size - (data2 - data)) {
622  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
623  return AVERROR_INVALIDDATA;
624  }
625  ff_vp56_init_range_decoder(&s->c, data2, size2);
626  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
627  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
628  return AVERROR_INVALIDDATA;
629  }
630 
631  if (s->keyframe || s->intraonly) {
632  memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
633  } else {
634  memset(&s->counts, 0, sizeof(s->counts));
635  }
636  // FIXME is it faster to not copy here, but do it down in the fw updates
637  // as explicit copies if the fw update is missing (and skip the copy upon
638  // fw update)?
639  s->prob.p = s->prob_ctx[c].p;
640 
641  // txfm updates
642  if (s->lossless) {
643  s->txfmmode = TX_4X4;
644  } else {
645  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
646  if (s->txfmmode == 3)
647  s->txfmmode += vp8_rac_get(&s->c);
648 
649  if (s->txfmmode == TX_SWITCHABLE) {
650  for (i = 0; i < 2; i++)
651  if (vp56_rac_get_prob_branchy(&s->c, 252))
652  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
653  for (i = 0; i < 2; i++)
654  for (j = 0; j < 2; j++)
655  if (vp56_rac_get_prob_branchy(&s->c, 252))
656  s->prob.p.tx16p[i][j] =
657  update_prob(&s->c, s->prob.p.tx16p[i][j]);
658  for (i = 0; i < 2; i++)
659  for (j = 0; j < 3; j++)
660  if (vp56_rac_get_prob_branchy(&s->c, 252))
661  s->prob.p.tx32p[i][j] =
662  update_prob(&s->c, s->prob.p.tx32p[i][j]);
663  }
664  }
665 
666  // coef updates
667  for (i = 0; i < 4; i++) {
668  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
669  if (vp8_rac_get(&s->c)) {
670  for (j = 0; j < 2; j++)
671  for (k = 0; k < 2; k++)
672  for (l = 0; l < 6; l++)
673  for (m = 0; m < 6; m++) {
674  uint8_t *p = s->prob.coef[i][j][k][l][m];
675  uint8_t *r = ref[j][k][l][m];
676  if (m >= 3 && l == 0) // dc only has 3 pt
677  break;
678  for (n = 0; n < 3; n++) {
679  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
680  p[n] = update_prob(&s->c, r[n]);
681  } else {
682  p[n] = r[n];
683  }
684  }
685  p[3] = 0;
686  }
687  } else {
688  for (j = 0; j < 2; j++)
689  for (k = 0; k < 2; k++)
690  for (l = 0; l < 6; l++)
691  for (m = 0; m < 6; m++) {
692  uint8_t *p = s->prob.coef[i][j][k][l][m];
693  uint8_t *r = ref[j][k][l][m];
694  if (m > 3 && l == 0) // dc only has 3 pt
695  break;
696  memcpy(p, r, 3);
697  p[3] = 0;
698  }
699  }
700  if (s->txfmmode == i)
701  break;
702  }
703 
704  // mode updates
705  for (i = 0; i < 3; i++)
706  if (vp56_rac_get_prob_branchy(&s->c, 252))
707  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
708  if (!s->keyframe && !s->intraonly) {
709  for (i = 0; i < 7; i++)
710  for (j = 0; j < 3; j++)
711  if (vp56_rac_get_prob_branchy(&s->c, 252))
712  s->prob.p.mv_mode[i][j] =
713  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
714 
715  if (s->filtermode == FILTER_SWITCHABLE)
716  for (i = 0; i < 4; i++)
717  for (j = 0; j < 2; j++)
718  if (vp56_rac_get_prob_branchy(&s->c, 252))
719  s->prob.p.filter[i][j] =
720  update_prob(&s->c, s->prob.p.filter[i][j]);
721 
722  for (i = 0; i < 4; i++)
723  if (vp56_rac_get_prob_branchy(&s->c, 252))
724  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
725 
726  if (s->allowcompinter) {
727  s->comppredmode = vp8_rac_get(&s->c);
728  if (s->comppredmode)
729  s->comppredmode += vp8_rac_get(&s->c);
730  if (s->comppredmode == PRED_SWITCHABLE)
731  for (i = 0; i < 5; i++)
732  if (vp56_rac_get_prob_branchy(&s->c, 252))
733  s->prob.p.comp[i] =
734  update_prob(&s->c, s->prob.p.comp[i]);
735  } else {
737  }
738 
739  if (s->comppredmode != PRED_COMPREF) {
740  for (i = 0; i < 5; i++) {
741  if (vp56_rac_get_prob_branchy(&s->c, 252))
742  s->prob.p.single_ref[i][0] =
743  update_prob(&s->c, s->prob.p.single_ref[i][0]);
744  if (vp56_rac_get_prob_branchy(&s->c, 252))
745  s->prob.p.single_ref[i][1] =
746  update_prob(&s->c, s->prob.p.single_ref[i][1]);
747  }
748  }
749 
750  if (s->comppredmode != PRED_SINGLEREF) {
751  for (i = 0; i < 5; i++)
752  if (vp56_rac_get_prob_branchy(&s->c, 252))
753  s->prob.p.comp_ref[i] =
754  update_prob(&s->c, s->prob.p.comp_ref[i]);
755  }
756 
757  for (i = 0; i < 4; i++)
758  for (j = 0; j < 9; j++)
759  if (vp56_rac_get_prob_branchy(&s->c, 252))
760  s->prob.p.y_mode[i][j] =
761  update_prob(&s->c, s->prob.p.y_mode[i][j]);
762 
763  for (i = 0; i < 4; i++)
764  for (j = 0; j < 4; j++)
765  for (k = 0; k < 3; k++)
766  if (vp56_rac_get_prob_branchy(&s->c, 252))
767  s->prob.p.partition[3 - i][j][k] =
768  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
769 
770  // mv fields don't use the update_prob subexp model for some reason
771  for (i = 0; i < 3; i++)
772  if (vp56_rac_get_prob_branchy(&s->c, 252))
773  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
774 
775  for (i = 0; i < 2; i++) {
776  if (vp56_rac_get_prob_branchy(&s->c, 252))
777  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
778 
779  for (j = 0; j < 10; j++)
780  if (vp56_rac_get_prob_branchy(&s->c, 252))
781  s->prob.p.mv_comp[i].classes[j] =
782  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
783 
784  if (vp56_rac_get_prob_branchy(&s->c, 252))
785  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
786 
787  for (j = 0; j < 10; j++)
788  if (vp56_rac_get_prob_branchy(&s->c, 252))
789  s->prob.p.mv_comp[i].bits[j] =
790  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
791  }
792 
793  for (i = 0; i < 2; i++) {
794  for (j = 0; j < 2; j++)
795  for (k = 0; k < 3; k++)
796  if (vp56_rac_get_prob_branchy(&s->c, 252))
797  s->prob.p.mv_comp[i].class0_fp[j][k] =
798  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
799 
800  for (j = 0; j < 3; j++)
801  if (vp56_rac_get_prob_branchy(&s->c, 252))
802  s->prob.p.mv_comp[i].fp[j] =
803  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
804  }
805 
806  if (s->highprecisionmvs) {
807  for (i = 0; i < 2; i++) {
808  if (vp56_rac_get_prob_branchy(&s->c, 252))
809  s->prob.p.mv_comp[i].class0_hp =
810  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
811 
812  if (vp56_rac_get_prob_branchy(&s->c, 252))
813  s->prob.p.mv_comp[i].hp =
814  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
815  }
816  }
817  }
818 
819  return (data2 - data) + size2;
820 }
821 
822 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
823  VP9Context *s)
824 {
825  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
826  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
827 }
828 
829 static void find_ref_mvs(VP9Context *s,
830  VP56mv *pmv, int ref, int z, int idx, int sb)
831 {
832  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
833  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
834  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
835  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
836  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
837  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
838  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
839  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
840  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
841  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
842  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
843  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
844  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
845  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
846  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
847  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
848  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
849  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
850  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
851  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
852  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
853  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
854  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
855  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
856  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
857  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
858  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
859  };
860  VP9Block *const b = &s->b;
861  int row = b->row, col = b->col, row7 = b->row7;
862  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
863 #define INVALID_MV 0x80008000U
864  uint32_t mem = INVALID_MV;
865  int i;
866 
867 #define RETURN_DIRECT_MV(mv) \
868  do { \
869  uint32_t m = AV_RN32A(&mv); \
870  if (!idx) { \
871  AV_WN32A(pmv, m); \
872  return; \
873  } else if (mem == INVALID_MV) { \
874  mem = m; \
875  } else if (m != mem) { \
876  AV_WN32A(pmv, m); \
877  return; \
878  } \
879  } while (0)
880 
881  if (sb >= 0) {
882  if (sb == 2 || sb == 1) {
883  RETURN_DIRECT_MV(b->mv[0][z]);
884  } else if (sb == 3) {
885  RETURN_DIRECT_MV(b->mv[2][z]);
886  RETURN_DIRECT_MV(b->mv[1][z]);
887  RETURN_DIRECT_MV(b->mv[0][z]);
888  }
889 
890 #define RETURN_MV(mv) \
891  do { \
892  if (sb > 0) { \
893  VP56mv tmp; \
894  uint32_t m; \
895  clamp_mv(&tmp, &mv, s); \
896  m = AV_RN32A(&tmp); \
897  if (!idx) { \
898  AV_WN32A(pmv, m); \
899  return; \
900  } else if (mem == INVALID_MV) { \
901  mem = m; \
902  } else if (m != mem) { \
903  AV_WN32A(pmv, m); \
904  return; \
905  } \
906  } else { \
907  uint32_t m = AV_RN32A(&mv); \
908  if (!idx) { \
909  clamp_mv(pmv, &mv, s); \
910  return; \
911  } else if (mem == INVALID_MV) { \
912  mem = m; \
913  } else if (m != mem) { \
914  clamp_mv(pmv, &mv, s); \
915  return; \
916  } \
917  } \
918  } while (0)
919 
920  if (row > 0) {
921  struct VP9mvrefPair *mv = &s->mv[0][(row - 1) * s->sb_cols * 8 + col];
922  if (mv->ref[0] == ref) {
923  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
924  } else if (mv->ref[1] == ref) {
925  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
926  }
927  }
928  if (col > s->tiling.tile_col_start) {
929  struct VP9mvrefPair *mv = &s->mv[0][row * s->sb_cols * 8 + col - 1];
930  if (mv->ref[0] == ref) {
931  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
932  } else if (mv->ref[1] == ref) {
933  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
934  }
935  }
936  i = 2;
937  } else {
938  i = 0;
939  }
940 
941  // previously coded MVs in this neighbourhood, using same reference frame
942  for (; i < 8; i++) {
943  int c = p[i][0] + col, r = p[i][1] + row;
944 
945  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
946  struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
947 
948  if (mv->ref[0] == ref) {
949  RETURN_MV(mv->mv[0]);
950  } else if (mv->ref[1] == ref) {
951  RETURN_MV(mv->mv[1]);
952  }
953  }
954  }
955 
956  // MV at this position in previous frame, using same reference frame
957  if (s->use_last_frame_mvs) {
958  struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
959 
960  if (mv->ref[0] == ref) {
961  RETURN_MV(mv->mv[0]);
962  } else if (mv->ref[1] == ref) {
963  RETURN_MV(mv->mv[1]);
964  }
965  }
966 
967 #define RETURN_SCALE_MV(mv, scale) \
968  do { \
969  if (scale) { \
970  VP56mv mv_temp = { -mv.x, -mv.y }; \
971  RETURN_MV(mv_temp); \
972  } else { \
973  RETURN_MV(mv); \
974  } \
975  } while (0)
976 
977  // previously coded MVs in this neighbourhood, using different reference frame
978  for (i = 0; i < 8; i++) {
979  int c = p[i][0] + col, r = p[i][1] + row;
980 
981  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
982  struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
983 
984  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
985  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
986  }
987  if (mv->ref[1] != ref && mv->ref[1] >= 0) {
988  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
989  }
990  }
991  }
992 
993  // MV at this position in previous frame, using different reference frame
994  if (s->use_last_frame_mvs) {
995  struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
996 
997  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
998  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
999  }
1000  if (mv->ref[1] != ref && mv->ref[1] >= 0) {
1001  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1002  }
1003  }
1004 
1005  AV_ZERO32(pmv);
1006 #undef INVALID_MV
1007 #undef RETURN_MV
1008 #undef RETURN_SCALE_MV
1009 }
1010 
1011 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1012 {
1013  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1014  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1015  s->prob.p.mv_comp[idx].classes);
1016 
1017  s->counts.mv_comp[idx].sign[sign]++;
1018  s->counts.mv_comp[idx].classes[c]++;
1019  if (c) {
1020  int m;
1021 
1022  for (n = 0, m = 0; m < c; m++) {
1023  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1024  n |= bit << m;
1025  s->counts.mv_comp[idx].bits[m][bit]++;
1026  }
1027  n <<= 3;
1028  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1029  n |= bit << 1;
1030  s->counts.mv_comp[idx].fp[bit]++;
1031  if (hp) {
1032  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1033  s->counts.mv_comp[idx].hp[bit]++;
1034  n |= bit;
1035  } else {
1036  n |= 1;
1037  // bug in libvpx - we count for bw entropy purposes even if the
1038  // bit wasn't coded
1039  s->counts.mv_comp[idx].hp[1]++;
1040  }
1041  n += 8 << c;
1042  } else {
1043  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1044  s->counts.mv_comp[idx].class0[n]++;
1045  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1046  s->prob.p.mv_comp[idx].class0_fp[n]);
1047  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1048  n = (n << 3) | (bit << 1);
1049  if (hp) {
1050  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1051  s->counts.mv_comp[idx].class0_hp[bit]++;
1052  n |= bit;
1053  } else {
1054  n |= 1;
1055  // bug in libvpx - we count for bw entropy purposes even if the
1056  // bit wasn't coded
1057  s->counts.mv_comp[idx].class0_hp[1]++;
1058  }
1059  }
1060 
1061  return sign ? -(n + 1) : (n + 1);
1062 }
1063 
1064 static void fill_mv(VP9Context *s,
1065  VP56mv *mv, int mode, int sb)
1066 {
1067  VP9Block *const b = &s->b;
1068 
1069  if (mode == ZEROMV) {
1070  memset(mv, 0, sizeof(*mv) * 2);
1071  } else {
1072  int hp;
1073 
1074  // FIXME cache this value and reuse for other subblocks
1075  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1076  mode == NEWMV ? -1 : sb);
1077  // FIXME maybe move this code into find_ref_mvs()
1078  if ((mode == NEWMV || sb == -1) &&
1079  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1080  if (mv[0].y & 1) {
1081  if (mv[0].y < 0)
1082  mv[0].y++;
1083  else
1084  mv[0].y--;
1085  }
1086  if (mv[0].x & 1) {
1087  if (mv[0].x < 0)
1088  mv[0].x++;
1089  else
1090  mv[0].x--;
1091  }
1092  }
1093  if (mode == NEWMV) {
1095  s->prob.p.mv_joint);
1096 
1097  s->counts.mv_joint[j]++;
1098  if (j >= MV_JOINT_V)
1099  mv[0].y += read_mv_component(s, 0, hp);
1100  if (j & 1)
1101  mv[0].x += read_mv_component(s, 1, hp);
1102  }
1103 
1104  if (b->comp) {
1105  // FIXME cache this value and reuse for other subblocks
1106  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1107  mode == NEWMV ? -1 : sb);
1108  if ((mode == NEWMV || sb == -1) &&
1109  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1110  if (mv[1].y & 1) {
1111  if (mv[1].y < 0)
1112  mv[1].y++;
1113  else
1114  mv[1].y--;
1115  }
1116  if (mv[1].x & 1) {
1117  if (mv[1].x < 0)
1118  mv[1].x++;
1119  else
1120  mv[1].x--;
1121  }
1122  }
1123  if (mode == NEWMV) {
1125  s->prob.p.mv_joint);
1126 
1127  s->counts.mv_joint[j]++;
1128  if (j >= MV_JOINT_V)
1129  mv[1].y += read_mv_component(s, 0, hp);
1130  if (j & 1)
1131  mv[1].x += read_mv_component(s, 1, hp);
1132  }
1133  }
1134  }
1135 }
1136 
1137 static void decode_mode(AVCodecContext *ctx)
1138 {
1139  static const uint8_t left_ctx[N_BS_SIZES] = {
1140  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1141  };
1142  static const uint8_t above_ctx[N_BS_SIZES] = {
1143  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1144  };
1145  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1147  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1148  };
1149  VP9Context *s = ctx->priv_data;
1150  VP9Block *const b = &s->b;
1151  int row = b->row, col = b->col, row7 = b->row7;
1152  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1153  int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1154  int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1155  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1156 
1157  if (!s->segmentation.enabled) {
1158  b->seg_id = 0;
1159  } else if (s->keyframe || s->intraonly) {
1160  b->seg_id = s->segmentation.update_map ?
1162  } else if (!s->segmentation.update_map ||
1163  (s->segmentation.temporal &&
1165  s->prob.segpred[s->above_segpred_ctx[col] +
1166  s->left_segpred_ctx[row7]]))) {
1167  int pred = 8, x;
1168 
1169  for (y = 0; y < h4; y++)
1170  for (x = 0; x < w4; x++)
1171  pred = FFMIN(pred, s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
1172  av_assert1(pred < 8);
1173  b->seg_id = pred;
1174 
1175  memset(&s->above_segpred_ctx[col], 1, w4);
1176  memset(&s->left_segpred_ctx[row7], 1, h4);
1177  } else {
1179  s->prob.seg);
1180 
1181  memset(&s->above_segpred_ctx[col], 0, w4);
1182  memset(&s->left_segpred_ctx[row7], 0, h4);
1183  }
1184  if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1185  for (y = 0; y < h4; y++)
1186  memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
1187  b->seg_id, w4);
1188  }
1189 
1190  b->skip = s->segmentation.enabled &&
1191  s->segmentation.feat[b->seg_id].skip_enabled;
1192  if (!b->skip) {
1193  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1194  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1195  s->counts.skip[c][b->skip]++;
1196  }
1197 
1198  if (s->keyframe || s->intraonly) {
1199  b->intra = 1;
1200  } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1201  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1202  } else {
1203  int c, bit;
1204 
1205  if (have_a && have_l) {
1206  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1207  c += (c == 2);
1208  } else {
1209  c = have_a ? 2 * s->above_intra_ctx[col] :
1210  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1211  }
1212  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1213  s->counts.intra[c][bit]++;
1214  b->intra = !bit;
1215  }
1216 
1217  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1218  int c;
1219  if (have_a) {
1220  if (have_l) {
1221  c = (s->above_skip_ctx[col] ? max_tx :
1222  s->above_txfm_ctx[col]) +
1223  (s->left_skip_ctx[row7] ? max_tx :
1224  s->left_txfm_ctx[row7]) > max_tx;
1225  } else {
1226  c = s->above_skip_ctx[col] ? 1 :
1227  (s->above_txfm_ctx[col] * 2 > max_tx);
1228  }
1229  } else if (have_l) {
1230  c = s->left_skip_ctx[row7] ? 1 :
1231  (s->left_txfm_ctx[row7] * 2 > max_tx);
1232  } else {
1233  c = 1;
1234  }
1235  switch (max_tx) {
1236  case TX_32X32:
1237  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1238  if (b->tx) {
1239  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1240  if (b->tx == 2)
1241  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1242  }
1243  s->counts.tx32p[c][b->tx]++;
1244  break;
1245  case TX_16X16:
1246  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1247  if (b->tx)
1248  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1249  s->counts.tx16p[c][b->tx]++;
1250  break;
1251  case TX_8X8:
1252  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1253  s->counts.tx8p[c][b->tx]++;
1254  break;
1255  case TX_4X4:
1256  b->tx = TX_4X4;
1257  break;
1258  }
1259  } else {
1260  b->tx = FFMIN(max_tx, s->txfmmode);
1261  }
1262 
1263  if (s->keyframe || s->intraonly) {
1264  uint8_t *a = &s->above_mode_ctx[col * 2];
1265  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1266 
1267  b->comp = 0;
1268  if (b->bs > BS_8x8) {
1269  // FIXME the memory storage intermediates here aren't really
1270  // necessary, they're just there to make the code slightly
1271  // simpler for now
1272  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1273  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1274  if (b->bs != BS_8x4) {
1276  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1277  l[0] = a[1] = b->mode[1];
1278  } else {
1279  l[0] = a[1] = b->mode[1] = b->mode[0];
1280  }
1281  if (b->bs != BS_4x8) {
1282  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1283  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1284  if (b->bs != BS_8x4) {
1286  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1287  l[1] = a[1] = b->mode[3];
1288  } else {
1289  l[1] = a[1] = b->mode[3] = b->mode[2];
1290  }
1291  } else {
1292  b->mode[2] = b->mode[0];
1293  l[1] = a[1] = b->mode[3] = b->mode[1];
1294  }
1295  } else {
1297  vp9_default_kf_ymode_probs[*a][*l]);
1298  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1299  // FIXME this can probably be optimized
1300  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1301  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1302  }
1305  } else if (b->intra) {
1306  b->comp = 0;
1307  if (b->bs > BS_8x8) {
1309  s->prob.p.y_mode[0]);
1310  s->counts.y_mode[0][b->mode[0]]++;
1311  if (b->bs != BS_8x4) {
1313  s->prob.p.y_mode[0]);
1314  s->counts.y_mode[0][b->mode[1]]++;
1315  } else {
1316  b->mode[1] = b->mode[0];
1317  }
1318  if (b->bs != BS_4x8) {
1320  s->prob.p.y_mode[0]);
1321  s->counts.y_mode[0][b->mode[2]]++;
1322  if (b->bs != BS_8x4) {
1324  s->prob.p.y_mode[0]);
1325  s->counts.y_mode[0][b->mode[3]]++;
1326  } else {
1327  b->mode[3] = b->mode[2];
1328  }
1329  } else {
1330  b->mode[2] = b->mode[0];
1331  b->mode[3] = b->mode[1];
1332  }
1333  } else {
1334  static const uint8_t size_group[10] = {
1335  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1336  };
1337  int sz = size_group[b->bs];
1338 
1340  s->prob.p.y_mode[sz]);
1341  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1342  s->counts.y_mode[sz][b->mode[3]]++;
1343  }
1345  s->prob.p.uv_mode[b->mode[3]]);
1346  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1347  } else {
1348  static const uint8_t inter_mode_ctx_lut[14][14] = {
1349  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1350  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1351  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1352  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1353  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1354  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1355  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1356  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1357  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1358  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1359  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1360  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1361  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1362  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1363  };
1364 
1365  if (s->segmentation.feat[b->seg_id].ref_enabled) {
1366  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1367  b->comp = 0;
1368  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1369  } else {
1370  // read comp_pred flag
1371  if (s->comppredmode != PRED_SWITCHABLE) {
1372  b->comp = s->comppredmode == PRED_COMPREF;
1373  } else {
1374  int c;
1375 
1376  // FIXME add intra as ref=0xff (or -1) to make these easier?
1377  if (have_a) {
1378  if (have_l) {
1379  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1380  c = 4;
1381  } else if (s->above_comp_ctx[col]) {
1382  c = 2 + (s->left_intra_ctx[row7] ||
1383  s->left_ref_ctx[row7] == s->fixcompref);
1384  } else if (s->left_comp_ctx[row7]) {
1385  c = 2 + (s->above_intra_ctx[col] ||
1386  s->above_ref_ctx[col] == s->fixcompref);
1387  } else {
1388  c = (!s->above_intra_ctx[col] &&
1389  s->above_ref_ctx[col] == s->fixcompref) ^
1390  (!s->left_intra_ctx[row7] &&
1391  s->left_ref_ctx[row & 7] == s->fixcompref);
1392  }
1393  } else {
1394  c = s->above_comp_ctx[col] ? 3 :
1395  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1396  }
1397  } else if (have_l) {
1398  c = s->left_comp_ctx[row7] ? 3 :
1399  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1400  } else {
1401  c = 1;
1402  }
1403  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1404  s->counts.comp[c][b->comp]++;
1405  }
1406 
1407  // read actual references
1408  // FIXME probably cache a few variables here to prevent repetitive
1409  // memory accesses below
1410  if (b->comp) /* two references */ {
1411  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1412 
1413  b->ref[fix_idx] = s->fixcompref;
1414  // FIXME can this codeblob be replaced by some sort of LUT?
1415  if (have_a) {
1416  if (have_l) {
1417  if (s->above_intra_ctx[col]) {
1418  if (s->left_intra_ctx[row7]) {
1419  c = 2;
1420  } else {
1421  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1422  }
1423  } else if (s->left_intra_ctx[row7]) {
1424  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1425  } else {
1426  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1427 
1428  if (refl == refa && refa == s->varcompref[1]) {
1429  c = 0;
1430  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1431  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1432  (refl == s->fixcompref && refa == s->varcompref[0])) {
1433  c = 4;
1434  } else {
1435  c = (refa == refl) ? 3 : 1;
1436  }
1437  } else if (!s->left_comp_ctx[row7]) {
1438  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1439  c = 1;
1440  } else {
1441  c = (refl == s->varcompref[1] &&
1442  refa != s->varcompref[1]) ? 2 : 4;
1443  }
1444  } else if (!s->above_comp_ctx[col]) {
1445  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1446  c = 1;
1447  } else {
1448  c = (refa == s->varcompref[1] &&
1449  refl != s->varcompref[1]) ? 2 : 4;
1450  }
1451  } else {
1452  c = (refl == refa) ? 4 : 2;
1453  }
1454  }
1455  } else {
1456  if (s->above_intra_ctx[col]) {
1457  c = 2;
1458  } else if (s->above_comp_ctx[col]) {
1459  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1460  } else {
1461  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1462  }
1463  }
1464  } else if (have_l) {
1465  if (s->left_intra_ctx[row7]) {
1466  c = 2;
1467  } else if (s->left_comp_ctx[row7]) {
1468  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1469  } else {
1470  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1471  }
1472  } else {
1473  c = 2;
1474  }
1475  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1476  b->ref[var_idx] = s->varcompref[bit];
1477  s->counts.comp_ref[c][bit]++;
1478  } else /* single reference */ {
1479  int bit, c;
1480 
1481  if (have_a && !s->above_intra_ctx[col]) {
1482  if (have_l && !s->left_intra_ctx[row7]) {
1483  if (s->left_comp_ctx[row7]) {
1484  if (s->above_comp_ctx[col]) {
1485  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1486  !s->above_ref_ctx[col]);
1487  } else {
1488  c = (3 * !s->above_ref_ctx[col]) +
1489  (!s->fixcompref || !s->left_ref_ctx[row7]);
1490  }
1491  } else if (s->above_comp_ctx[col]) {
1492  c = (3 * !s->left_ref_ctx[row7]) +
1493  (!s->fixcompref || !s->above_ref_ctx[col]);
1494  } else {
1495  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1496  }
1497  } else if (s->above_intra_ctx[col]) {
1498  c = 2;
1499  } else if (s->above_comp_ctx[col]) {
1500  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1501  } else {
1502  c = 4 * (!s->above_ref_ctx[col]);
1503  }
1504  } else if (have_l && !s->left_intra_ctx[row7]) {
1505  if (s->left_intra_ctx[row7]) {
1506  c = 2;
1507  } else if (s->left_comp_ctx[row7]) {
1508  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1509  } else {
1510  c = 4 * (!s->left_ref_ctx[row7]);
1511  }
1512  } else {
1513  c = 2;
1514  }
1515  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1516  s->counts.single_ref[c][0][bit]++;
1517  if (!bit) {
1518  b->ref[0] = 0;
1519  } else {
1520  // FIXME can this codeblob be replaced by some sort of LUT?
1521  if (have_a) {
1522  if (have_l) {
1523  if (s->left_intra_ctx[row7]) {
1524  if (s->above_intra_ctx[col]) {
1525  c = 2;
1526  } else if (s->above_comp_ctx[col]) {
1527  c = 1 + 2 * (s->fixcompref == 1 ||
1528  s->above_ref_ctx[col] == 1);
1529  } else if (!s->above_ref_ctx[col]) {
1530  c = 3;
1531  } else {
1532  c = 4 * (s->above_ref_ctx[col] == 1);
1533  }
1534  } else if (s->above_intra_ctx[col]) {
1535  if (s->left_intra_ctx[row7]) {
1536  c = 2;
1537  } else if (s->left_comp_ctx[row7]) {
1538  c = 1 + 2 * (s->fixcompref == 1 ||
1539  s->left_ref_ctx[row7] == 1);
1540  } else if (!s->left_ref_ctx[row7]) {
1541  c = 3;
1542  } else {
1543  c = 4 * (s->left_ref_ctx[row7] == 1);
1544  }
1545  } else if (s->above_comp_ctx[col]) {
1546  if (s->left_comp_ctx[row7]) {
1547  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1548  c = 3 * (s->fixcompref == 1 ||
1549  s->left_ref_ctx[row7] == 1);
1550  } else {
1551  c = 2;
1552  }
1553  } else if (!s->left_ref_ctx[row7]) {
1554  c = 1 + 2 * (s->fixcompref == 1 ||
1555  s->above_ref_ctx[col] == 1);
1556  } else {
1557  c = 3 * (s->left_ref_ctx[row7] == 1) +
1558  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1559  }
1560  } else if (s->left_comp_ctx[row7]) {
1561  if (!s->above_ref_ctx[col]) {
1562  c = 1 + 2 * (s->fixcompref == 1 ||
1563  s->left_ref_ctx[row7] == 1);
1564  } else {
1565  c = 3 * (s->above_ref_ctx[col] == 1) +
1566  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1567  }
1568  } else if (!s->above_ref_ctx[col]) {
1569  if (!s->left_ref_ctx[row7]) {
1570  c = 3;
1571  } else {
1572  c = 4 * (s->left_ref_ctx[row7] == 1);
1573  }
1574  } else if (!s->left_ref_ctx[row7]) {
1575  c = 4 * (s->above_ref_ctx[col] == 1);
1576  } else {
1577  c = 2 * (s->left_ref_ctx[row7] == 1) +
1578  2 * (s->above_ref_ctx[col] == 1);
1579  }
1580  } else {
1581  if (s->above_intra_ctx[col] ||
1582  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1583  c = 2;
1584  } else if (s->above_comp_ctx[col]) {
1585  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1586  } else {
1587  c = 4 * (s->above_ref_ctx[col] == 1);
1588  }
1589  }
1590  } else if (have_l) {
1591  if (s->left_intra_ctx[row7] ||
1592  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1593  c = 2;
1594  } else if (s->left_comp_ctx[row7]) {
1595  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1596  } else {
1597  c = 4 * (s->left_ref_ctx[row7] == 1);
1598  }
1599  } else {
1600  c = 2;
1601  }
1602  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1603  s->counts.single_ref[c][1][bit]++;
1604  b->ref[0] = 1 + bit;
1605  }
1606  }
1607  }
1608 
1609  if (b->bs <= BS_8x8) {
1610  if (s->segmentation.feat[b->seg_id].skip_enabled) {
1611  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1612  } else {
1613  static const uint8_t off[10] = {
1614  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1615  };
1616 
1617  // FIXME this needs to use the LUT tables from find_ref_mvs
1618  // because not all are -1,0/0,-1
1619  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1620  [s->left_mode_ctx[row7 + off[b->bs]]];
1621 
1623  s->prob.p.mv_mode[c]);
1624  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1625  s->counts.mv_mode[c][b->mode[0] - 10]++;
1626  }
1627  }
1628 
1629  if (s->filtermode == FILTER_SWITCHABLE) {
1630  int c;
1631 
1632  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1633  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1634  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1635  s->left_filter_ctx[row7] : 3;
1636  } else {
1637  c = s->above_filter_ctx[col];
1638  }
1639  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1640  c = s->left_filter_ctx[row7];
1641  } else {
1642  c = 3;
1643  }
1644 
1646  s->prob.p.filter[c]);
1647  s->counts.filter[c][b->filter]++;
1648  } else {
1649  b->filter = s->filtermode;
1650  }
1651 
1652  if (b->bs > BS_8x8) {
1653  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1654 
1656  s->prob.p.mv_mode[c]);
1657  s->counts.mv_mode[c][b->mode[0] - 10]++;
1658  fill_mv(s, b->mv[0], b->mode[0], 0);
1659 
1660  if (b->bs != BS_8x4) {
1662  s->prob.p.mv_mode[c]);
1663  s->counts.mv_mode[c][b->mode[1] - 10]++;
1664  fill_mv(s, b->mv[1], b->mode[1], 1);
1665  } else {
1666  b->mode[1] = b->mode[0];
1667  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1668  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1669  }
1670 
1671  if (b->bs != BS_4x8) {
1673  s->prob.p.mv_mode[c]);
1674  s->counts.mv_mode[c][b->mode[2] - 10]++;
1675  fill_mv(s, b->mv[2], b->mode[2], 2);
1676 
1677  if (b->bs != BS_8x4) {
1679  s->prob.p.mv_mode[c]);
1680  s->counts.mv_mode[c][b->mode[3] - 10]++;
1681  fill_mv(s, b->mv[3], b->mode[3], 3);
1682  } else {
1683  b->mode[3] = b->mode[2];
1684  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1685  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1686  }
1687  } else {
1688  b->mode[2] = b->mode[0];
1689  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1690  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1691  b->mode[3] = b->mode[1];
1692  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1693  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1694  }
1695  } else {
1696  fill_mv(s, b->mv[0], b->mode[0], -1);
1697  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1698  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1699  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1700  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1701  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1702  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1703  }
1704  }
1705 
1706  // FIXME this can probably be optimized
1707  memset(&s->above_skip_ctx[col], b->skip, w4);
1708  memset(&s->left_skip_ctx[row7], b->skip, h4);
1709  memset(&s->above_txfm_ctx[col], b->tx, w4);
1710  memset(&s->left_txfm_ctx[row7], b->tx, h4);
1711  memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1712  memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1713  if (!s->keyframe && !s->intraonly) {
1714  memset(&s->above_intra_ctx[col], b->intra, w4);
1715  memset(&s->left_intra_ctx[row7], b->intra, h4);
1716  memset(&s->above_comp_ctx[col], b->comp, w4);
1717  memset(&s->left_comp_ctx[row7], b->comp, h4);
1718  memset(&s->above_mode_ctx[col], b->mode[3], w4);
1719  memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1720  if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1721  memset(&s->above_filter_ctx[col], b->filter, w4);
1722  memset(&s->left_filter_ctx[row7], b->filter, h4);
1723  b->filter = vp9_filter_lut[b->filter];
1724  }
1725  if (b->bs > BS_8x8) {
1726  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1727 
1728  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1729  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1730  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1731  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1732  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1733  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1734  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1735  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1736  } else {
1737  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1738 
1739  for (n = 0; n < w4 * 2; n++) {
1740  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1741  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1742  }
1743  for (n = 0; n < h4 * 2; n++) {
1744  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1745  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1746  }
1747  }
1748 
1749  if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1750  // as a direct check in above branches
1751  int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1752 
1753  memset(&s->above_ref_ctx[col], vref, w4);
1754  memset(&s->left_ref_ctx[row7], vref, h4);
1755  }
1756  }
1757 
1758  // FIXME kinda ugly
1759  for (y = 0; y < h4; y++) {
1760  int x, o = (row + y) * s->sb_cols * 8 + col;
1761 
1762  if (b->intra) {
1763  for (x = 0; x < w4; x++) {
1764  s->mv[0][o + x].ref[0] =
1765  s->mv[0][o + x].ref[1] = -1;
1766  }
1767  } else if (b->comp) {
1768  for (x = 0; x < w4; x++) {
1769  s->mv[0][o + x].ref[0] = b->ref[0];
1770  s->mv[0][o + x].ref[1] = b->ref[1];
1771  AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1772  AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
1773  }
1774  } else {
1775  for (x = 0; x < w4; x++) {
1776  s->mv[0][o + x].ref[0] = b->ref[0];
1777  s->mv[0][o + x].ref[1] = -1;
1778  AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1779  }
1780  }
1781  }
1782 }
1783 
1784 // FIXME remove tx argument, and merge cnt/eob arguments?
1785 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1786  enum TxfmMode tx, unsigned (*cnt)[6][3],
1787  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1788  int nnz, const int16_t *scan, const int16_t (*nb)[2],
1789  const int16_t *band_counts, const int16_t *qmul)
1790 {
1791  int i = 0, band = 0, band_left = band_counts[band];
1792  uint8_t *tp = p[0][nnz];
1793  uint8_t cache[1024];
1794 
1795  do {
1796  int val, rc;
1797 
1798  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1799  eob[band][nnz][val]++;
1800  if (!val)
1801  break;
1802 
1803  skip_eob:
1804  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1805  cnt[band][nnz][0]++;
1806  if (!--band_left)
1807  band_left = band_counts[++band];
1808  cache[scan[i]] = 0;
1809  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1810  tp = p[band][nnz];
1811  if (++i == n_coeffs)
1812  break; //invalid input; blocks should end with EOB
1813  goto skip_eob;
1814  }
1815 
1816  rc = scan[i];
1817  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1818  cnt[band][nnz][1]++;
1819  val = 1;
1820  cache[rc] = 1;
1821  } else {
1822  // fill in p[3-10] (model fill) - only once per frame for each pos
1823  if (!tp[3])
1824  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1825 
1826  cnt[band][nnz][2]++;
1827  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1828  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1829  cache[rc] = val = 2;
1830  } else {
1831  val = 3 + vp56_rac_get_prob(c, tp[5]);
1832  cache[rc] = 3;
1833  }
1834  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1835  cache[rc] = 4;
1836  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1837  val = 5 + vp56_rac_get_prob(c, 159);
1838  } else {
1839  val = 7 + (vp56_rac_get_prob(c, 165) << 1) +
1840  vp56_rac_get_prob(c, 145);
1841  }
1842  } else { // cat 3-6
1843  cache[rc] = 5;
1844  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1845  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1846  val = 11 + (vp56_rac_get_prob(c, 173) << 2) +
1847  (vp56_rac_get_prob(c, 148) << 1) +
1848  vp56_rac_get_prob(c, 140);
1849  } else {
1850  val = 19 + (vp56_rac_get_prob(c, 176) << 3) +
1851  (vp56_rac_get_prob(c, 155) << 2) +
1852  (vp56_rac_get_prob(c, 140) << 1) +
1853  vp56_rac_get_prob(c, 135);
1854  }
1855  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1856  val = 35 + (vp56_rac_get_prob(c, 180) << 4) +
1857  (vp56_rac_get_prob(c, 157) << 3) +
1858  (vp56_rac_get_prob(c, 141) << 2) +
1859  (vp56_rac_get_prob(c, 134) << 1) +
1860  vp56_rac_get_prob(c, 130);
1861  } else {
1862  val = 67 + (vp56_rac_get_prob(c, 254) << 13) +
1863  (vp56_rac_get_prob(c, 254) << 12) +
1864  (vp56_rac_get_prob(c, 254) << 11) +
1865  (vp56_rac_get_prob(c, 252) << 10) +
1866  (vp56_rac_get_prob(c, 249) << 9) +
1867  (vp56_rac_get_prob(c, 243) << 8) +
1868  (vp56_rac_get_prob(c, 230) << 7) +
1869  (vp56_rac_get_prob(c, 196) << 6) +
1870  (vp56_rac_get_prob(c, 177) << 5) +
1871  (vp56_rac_get_prob(c, 153) << 4) +
1872  (vp56_rac_get_prob(c, 140) << 3) +
1873  (vp56_rac_get_prob(c, 133) << 2) +
1874  (vp56_rac_get_prob(c, 130) << 1) +
1875  vp56_rac_get_prob(c, 129);
1876  }
1877  }
1878  }
1879  if (!--band_left)
1880  band_left = band_counts[++band];
1881  if (tx == TX_32X32) // FIXME slow
1882  coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1883  else
1884  coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1885  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1886  tp = p[band][nnz];
1887  } while (++i < n_coeffs);
1888 
1889  return i;
1890 }
1891 
1893 {
1894  VP9Context *s = ctx->priv_data;
1895  VP9Block *const b = &s->b;
1896  int row = b->row, col = b->col;
1897  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1898  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1899  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1900  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1901  int end_x = FFMIN(2 * (s->cols - col), w4);
1902  int end_y = FFMIN(2 * (s->rows - row), h4);
1903  int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
1904  int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
1905  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
1906  int tx = 4 * s->lossless + b->tx;
1907  const int16_t **yscans = vp9_scans[tx];
1908  const int16_t (**ynbs)[2] = vp9_scans_nb[tx];
1909  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
1910  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
1911  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
1912  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
1913  static const int16_t band_counts[4][8] = {
1914  { 1, 2, 3, 4, 3, 16 - 13 },
1915  { 1, 2, 3, 4, 11, 64 - 21 },
1916  { 1, 2, 3, 4, 11, 256 - 21 },
1917  { 1, 2, 3, 4, 11, 1024 - 21 },
1918  };
1919  const int16_t *y_band_counts = band_counts[b->tx];
1920  const int16_t *uv_band_counts = band_counts[b->uvtx];
1921 
1922  /* y tokens */
1923  if (b->tx > TX_4X4) { // FIXME slow
1924  for (y = 0; y < end_y; y += step1d)
1925  for (x = 1; x < step1d; x++)
1926  l[y] |= l[y + x];
1927  for (x = 0; x < end_x; x += step1d)
1928  for (y = 1; y < step1d; y++)
1929  a[x] |= a[x + y];
1930  }
1931  for (n = 0, y = 0; y < end_y; y += step1d) {
1932  for (x = 0; x < end_x; x += step1d, n += step) {
1933  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
1934  b->bs > BS_8x8 ?
1935  n : 0]];
1936  int nnz = a[x] + l[y];
1937  if ((res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
1938  b->tx, c, e, p, nnz, yscans[txtp],
1939  ynbs[txtp], y_band_counts, qmul[0])) < 0)
1940  return res;
1941  a[x] = l[y] = !!res;
1942  if (b->tx > TX_8X8) {
1943  AV_WN16A(&s->eob[n], res);
1944  } else {
1945  s->eob[n] = res;
1946  }
1947  }
1948  }
1949  if (b->tx > TX_4X4) { // FIXME slow
1950  for (y = 0; y < end_y; y += step1d)
1951  memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
1952  for (x = 0; x < end_x; x += step1d)
1953  memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
1954  }
1955 
1956  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1957  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1958  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1959  w4 >>= 1;
1960  h4 >>= 1;
1961  end_x >>= 1;
1962  end_y >>= 1;
1963  for (pl = 0; pl < 2; pl++) {
1964  a = &s->above_uv_nnz_ctx[pl][col];
1965  l = &s->left_uv_nnz_ctx[pl][row & 7];
1966  if (b->uvtx > TX_4X4) { // FIXME slow
1967  for (y = 0; y < end_y; y += uvstep1d)
1968  for (x = 1; x < uvstep1d; x++)
1969  l[y] |= l[y + x];
1970  for (x = 0; x < end_x; x += uvstep1d)
1971  for (y = 1; y < uvstep1d; y++)
1972  a[x] |= a[x + y];
1973  }
1974  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1975  for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
1976  int nnz = a[x] + l[y];
1977  if ((res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
1978  16 * uvstep, b->uvtx, c, e, p, nnz,
1979  uvscan, uvnb, uv_band_counts,
1980  qmul[1])) < 0)
1981  return res;
1982  a[x] = l[y] = !!res;
1983  if (b->uvtx > TX_8X8) {
1984  AV_WN16A(&s->uveob[pl][n], res);
1985  } else {
1986  s->uveob[pl][n] = res;
1987  }
1988  }
1989  }
1990  if (b->uvtx > TX_4X4) { // FIXME slow
1991  for (y = 0; y < end_y; y += uvstep1d)
1992  memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
1993  for (x = 0; x < end_x; x += uvstep1d)
1994  memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
1995  }
1996  }
1997 
1998  return 0;
1999 }
2000 
2002  uint8_t *dst_edge, ptrdiff_t stride_edge,
2003  uint8_t *dst_inner, ptrdiff_t stride_inner,
2004  uint8_t *l, int col, int x, int w,
2005  int row, int y, enum TxfmMode tx,
2006  int p)
2007 {
2008  int have_top = row > 0 || y > 0;
2009  int have_left = col > s->tiling.tile_col_start || x > 0;
2010  int have_right = x < w - 1;
2011  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2012  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2013  { DC_127_PRED, VERT_PRED } },
2014  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2015  { HOR_PRED, HOR_PRED } },
2016  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2017  { LEFT_DC_PRED, DC_PRED } },
2027  { DC_127_PRED, VERT_LEFT_PRED } },
2028  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2029  { HOR_UP_PRED, HOR_UP_PRED } },
2030  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2031  { HOR_PRED, TM_VP8_PRED } },
2032  };
2033  static const struct {
2034  uint8_t needs_left:1;
2035  uint8_t needs_top:1;
2036  uint8_t needs_topleft:1;
2037  uint8_t needs_topright:1;
2038  } edges[N_INTRA_PRED_MODES] = {
2039  [VERT_PRED] = { .needs_top = 1 },
2040  [HOR_PRED] = { .needs_left = 1 },
2041  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2042  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2043  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2044  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2045  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2046  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2047  [HOR_UP_PRED] = { .needs_left = 1 },
2048  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2049  [LEFT_DC_PRED] = { .needs_left = 1 },
2050  [TOP_DC_PRED] = { .needs_top = 1 },
2051  [DC_128_PRED] = { 0 },
2052  [DC_127_PRED] = { 0 },
2053  [DC_129_PRED] = { 0 }
2054  };
2055 
2056  av_assert2(mode >= 0 && mode < 10);
2057  mode = mode_conv[mode][have_left][have_top];
2058  if (edges[mode].needs_top) {
2059  uint8_t *top, *topleft;
2060  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2061  int n_px_need_tr = 0;
2062 
2063  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2064  n_px_need_tr = 4;
2065 
2066  // if top of sb64-row, use s->intra_pred_data[] instead of
2067  // dst[-stride] for intra prediction (it contains pre- instead of
2068  // post-loopfilter data)
2069  if (have_top) {
2070  top = !(row & 7) && !y ?
2071  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2072  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2073  if (have_left)
2074  topleft = !(row & 7) && !y ?
2075  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2076  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2077  &dst_inner[-stride_inner];
2078  }
2079 
2080  if (have_top &&
2081  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2082  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2083  n_px_need + n_px_need_tr <= n_px_have) {
2084  *a = top;
2085  } else {
2086  if (have_top) {
2087  if (n_px_need <= n_px_have) {
2088  memcpy(*a, top, n_px_need);
2089  } else {
2090  memcpy(*a, top, n_px_have);
2091  memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2092  n_px_need - n_px_have);
2093  }
2094  } else {
2095  memset(*a, 127, n_px_need);
2096  }
2097  if (edges[mode].needs_topleft) {
2098  if (have_left && have_top) {
2099  (*a)[-1] = topleft[-1];
2100  } else {
2101  (*a)[-1] = have_top ? 129 : 127;
2102  }
2103  }
2104  if (tx == TX_4X4 && edges[mode].needs_topright) {
2105  if (have_top && have_right &&
2106  n_px_need + n_px_need_tr <= n_px_have) {
2107  memcpy(&(*a)[4], &top[4], 4);
2108  } else {
2109  memset(&(*a)[4], (*a)[3], 4);
2110  }
2111  }
2112  }
2113  }
2114  if (edges[mode].needs_left) {
2115  if (have_left) {
2116  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2117  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2118  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2119 
2120  if (n_px_need <= n_px_have) {
2121  for (i = 0; i < n_px_need; i++)
2122  l[i] = dst[i * stride - 1];
2123  } else {
2124  for (i = 0; i < n_px_have; i++)
2125  l[i] = dst[i * stride - 1];
2126  memset(&l[i], l[i - 1], n_px_need - n_px_have);
2127  }
2128  } else {
2129  memset(l, 129, 4 << tx);
2130  }
2131  }
2132 
2133  return mode;
2134 }
2135 
2136 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2137 {
2138  VP9Context *s = ctx->priv_data;
2139  VP9Block *const b = &s->b;
2140  int row = b->row, col = b->col;
2141  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2142  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2143  int end_x = FFMIN(2 * (s->cols - col), w4);
2144  int end_y = FFMIN(2 * (s->rows - row), h4);
2145  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2146  int uvstep1d = 1 << b->uvtx, p;
2147  uint8_t *dst = b->dst[0], *dst_r = s->f->data[0] + y_off;
2148 
2149  for (n = 0, y = 0; y < end_y; y += step1d) {
2150  uint8_t *ptr = dst, *ptr_r = dst_r;
2151  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2152  ptr_r += 4 * step1d, n += step) {
2153  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2154  y * 2 + x : 0];
2155  LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2156  uint8_t *a = &a_buf[16], l[32];
2157  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2158  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2159 
2160  mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[0],
2161  ptr, b->y_stride, l,
2162  col, x, w4, row, y, b->tx, 0);
2163  s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
2164  if (eob)
2165  s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
2166  s->block + 16 * n, eob);
2167  }
2168  dst_r += 4 * s->f->linesize[0] * step1d;
2169  dst += 4 * b->y_stride * step1d;
2170  }
2171 
2172  // U/V
2173  h4 >>= 1;
2174  w4 >>= 1;
2175  end_x >>= 1;
2176  end_y >>= 1;
2177  step = 1 << (b->uvtx * 2);
2178  for (p = 0; p < 2; p++) {
2179  dst = b->dst[1 + p];
2180  dst_r = s->f->data[1 + p] + uv_off;
2181  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2182  uint8_t *ptr = dst, *ptr_r = dst_r;
2183  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2184  ptr_r += 4 * uvstep1d, n += step) {
2185  int mode = b->uvmode;
2186  LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2187  uint8_t *a = &a_buf[16], l[32];
2188  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2189 
2190  mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[1],
2191  ptr, b->uv_stride, l,
2192  col, x, w4, row, y, b->uvtx, p + 1);
2193  s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
2194  if (eob)
2195  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2196  s->uvblock[p] + 16 * n, eob);
2197  }
2198  dst_r += 4 * uvstep1d * s->f->linesize[1];
2199  dst += 4 * uvstep1d * b->uv_stride;
2200  }
2201  }
2202 }
2203 
2205  uint8_t *dst, ptrdiff_t dst_stride,
2206  const uint8_t *ref, ptrdiff_t ref_stride,
2207  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2208  int bw, int bh, int w, int h)
2209 {
2210  int mx = mv->x, my = mv->y;
2211 
2212  y += my >> 3;
2213  x += mx >> 3;
2214  ref += y * ref_stride + x;
2215  mx &= 7;
2216  my &= 7;
2217  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2218  if (x < !!mx * 3 || y < !!my * 3 ||
2219  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2221  ref - !!my * 3 * ref_stride - !!mx * 3,
2222  ref_stride,
2223  bw + !!mx * 7, bh + !!my * 7,
2224  x - !!mx * 3, y - !!my * 3, w, h);
2225  ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2226  ref_stride = 80;
2227  }
2228  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2229 }
2230 
2232  uint8_t *dst_u, uint8_t *dst_v,
2233  ptrdiff_t dst_stride,
2234  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2235  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2236  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2237  int bw, int bh, int w, int h)
2238 {
2239  int mx = mv->x, my = mv->y;
2240 
2241  y += my >> 4;
2242  x += mx >> 4;
2243  ref_u += y * src_stride_u + x;
2244  ref_v += y * src_stride_v + x;
2245  mx &= 15;
2246  my &= 15;
2247  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2248  if (x < !!mx * 3 || y < !!my * 3 ||
2249  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2251  ref_u - !!my * 3 * src_stride_u - !!mx * 3, src_stride_u,
2252  bw + !!mx * 7, bh + !!my * 7,
2253  x - !!mx * 3, y - !!my * 3, w, h);
2254  ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2255  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2256 
2258  ref_v - !!my * 3 * src_stride_v - !!mx * 3, src_stride_v,
2259  bw + !!mx * 7, bh + !!my * 7,
2260  x - !!mx * 3, y - !!my * 3, w, h);
2261  ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2262  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2263  } else {
2264  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2265  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2266  }
2267 }
2268 
2269 static void inter_recon(AVCodecContext *ctx)
2270 {
2271  static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2272  { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2273  { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2274  };
2275  VP9Context *s = ctx->priv_data;
2276  VP9Block *const b = &s->b;
2277  int row = b->row, col = b->col;
2278  AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
2279  AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
2280  int w = ctx->width, h = ctx->height;
2281  ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
2282 
2283  // y inter pred
2284  if (b->bs > BS_8x8) {
2285  if (b->bs == BS_8x4) {
2286  mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
2287  ref1->data[0], ref1->linesize[0],
2288  row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2289  mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2290  b->dst[0] + 4 * ls_y, ls_y,
2291  ref1->data[0], ref1->linesize[0],
2292  (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2293 
2294  if (b->comp) {
2295  mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
2296  ref2->data[0], ref2->linesize[0],
2297  row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2298  mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2299  b->dst[0] + 4 * ls_y, ls_y,
2300  ref2->data[0], ref2->linesize[0],
2301  (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2302  }
2303  } else if (b->bs == BS_4x8) {
2304  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2305  ref1->data[0], ref1->linesize[0],
2306  row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2307  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2308  ref1->data[0], ref1->linesize[0],
2309  row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2310 
2311  if (b->comp) {
2312  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2313  ref2->data[0], ref2->linesize[0],
2314  row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2315  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2316  ref2->data[0], ref2->linesize[0],
2317  row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2318  }
2319  } else {
2320  av_assert2(b->bs == BS_4x4);
2321 
2322  // FIXME if two horizontally adjacent blocks have the same MV,
2323  // do a w8 instead of a w4 call
2324  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2325  ref1->data[0], ref1->linesize[0],
2326  row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2327  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2328  ref1->data[0], ref1->linesize[0],
2329  row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2330  mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2331  b->dst[0] + 4 * ls_y, ls_y,
2332  ref1->data[0], ref1->linesize[0],
2333  (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2334  mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2335  b->dst[0] + 4 * ls_y + 4, ls_y,
2336  ref1->data[0], ref1->linesize[0],
2337  (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2338 
2339  if (b->comp) {
2340  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2341  ref2->data[0], ref2->linesize[0],
2342  row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2343  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2344  ref2->data[0], ref2->linesize[0],
2345  row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2346  mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2347  b->dst[0] + 4 * ls_y, ls_y,
2348  ref2->data[0], ref2->linesize[0],
2349  (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2350  mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2351  b->dst[0] + 4 * ls_y + 4, ls_y,
2352  ref2->data[0], ref2->linesize[0],
2353  (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2354  }
2355  }
2356  } else {
2357  int bwl = bwlog_tab[0][b->bs];
2358  int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2359 
2360  mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
2361  ref1->data[0], ref1->linesize[0],
2362  row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2363 
2364  if (b->comp)
2365  mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
2366  ref2->data[0], ref2->linesize[0],
2367  row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2368  }
2369 
2370  // uv inter pred
2371  {
2372  int bwl = bwlog_tab[1][b->bs];
2373  int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2374  VP56mv mvuv;
2375 
2376  w = (w + 1) >> 1;
2377  h = (h + 1) >> 1;
2378  if (b->bs > BS_8x8) {
2379  mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2380  mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2381  } else {
2382  mvuv = b->mv[0][0];
2383  }
2384 
2385  mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2386  b->dst[1], b->dst[2], ls_uv,
2387  ref1->data[1], ref1->linesize[1],
2388  ref1->data[2], ref1->linesize[2],
2389  row << 2, col << 2, &mvuv, bw, bh, w, h);
2390 
2391  if (b->comp) {
2392  if (b->bs > BS_8x8) {
2393  mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2394  mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2395  } else {
2396  mvuv = b->mv[0][1];
2397  }
2398  mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2399  b->dst[1], b->dst[2], ls_uv,
2400  ref2->data[1], ref2->linesize[1],
2401  ref2->data[2], ref2->linesize[2],
2402  row << 2, col << 2, &mvuv, bw, bh, w, h);
2403  }
2404  }
2405 
2406  if (!b->skip) {
2407  /* mostly copied intra_reconn() */
2408 
2409  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2410  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2411  int end_x = FFMIN(2 * (s->cols - col), w4);
2412  int end_y = FFMIN(2 * (s->rows - row), h4);
2413  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2414  int uvstep1d = 1 << b->uvtx, p;
2415  uint8_t *dst = b->dst[0];
2416 
2417  // y itxfm add
2418  for (n = 0, y = 0; y < end_y; y += step1d) {
2419  uint8_t *ptr = dst;
2420  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2421  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2422 
2423  if (eob)
2424  s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
2425  s->block + 16 * n, eob);
2426  }
2427  dst += 4 * b->y_stride * step1d;
2428  }
2429 
2430  // uv itxfm add
2431  h4 >>= 1;
2432  w4 >>= 1;
2433  end_x >>= 1;
2434  end_y >>= 1;
2435  step = 1 << (b->uvtx * 2);
2436  for (p = 0; p < 2; p++) {
2437  dst = b->dst[p + 1];
2438  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2439  uint8_t *ptr = dst;
2440  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2441  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2442 
2443  if (eob)
2444  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2445  s->uvblock[p] + 16 * n, eob);
2446  }
2447  dst += 4 * uvstep1d * b->uv_stride;
2448  }
2449  }
2450  }
2451 }
2452 
2453 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2454  int row_and_7, int col_and_7,
2455  int w, int h, int col_end, int row_end,
2456  enum TxfmMode tx, int skip_inter)
2457 {
2458  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2459  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2460  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2461  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2462 
2463  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2464  // edges. This means that for UV, we work on two subsampled blocks at
2465  // a time, and we only use the topleft block's mode information to set
2466  // things like block strength. Thus, for any block size smaller than
2467  // 16x16, ignore the odd portion of the block.
2468  if (tx == TX_4X4 && is_uv) {
2469  if (h == 1) {
2470  if (row_and_7 & 1)
2471  return;
2472  if (!row_end)
2473  h += 1;
2474  }
2475  if (w == 1) {
2476  if (col_and_7 & 1)
2477  return;
2478  if (!col_end)
2479  w += 1;
2480  }
2481  }
2482 
2483  if (tx == TX_4X4 && !skip_inter) {
2484  int t = 1 << col_and_7, m_col = (t << w) - t, y;
2485  int m_col_odd = (t << (w - 1)) - t;
2486 
2487  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2488  if (is_uv) {
2489  int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2490 
2491  for (y = row_and_7; y < h + row_and_7; y++) {
2492  int col_mask_id = 2 - !(y & 7);
2493 
2494  lflvl->mask[is_uv][0][y][1] |= m_row_8;
2495  lflvl->mask[is_uv][0][y][2] |= m_row_4;
2496  // for odd lines, if the odd col is not being filtered,
2497  // skip odd row also:
2498  // .---. <-- a
2499  // | |
2500  // |___| <-- b
2501  // ^ ^
2502  // c d
2503  //
2504  // if a/c are even row/col and b/d are odd, and d is skipped,
2505  // e.g. right edge of size-66x66.webm, then skip b also (bug)
2506  if ((col_end & 1) && (y & 1)) {
2507  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2508  } else {
2509  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2510  }
2511  }
2512  } else {
2513  int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2514 
2515  for (y = row_and_7; y < h + row_and_7; y++) {
2516  int col_mask_id = 2 - !(y & 3);
2517 
2518  lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2519  lflvl->mask[is_uv][0][y][2] |= m_row_4;
2520  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2521  lflvl->mask[is_uv][0][y][3] |= m_col;
2522  lflvl->mask[is_uv][1][y][3] |= m_col;
2523  }
2524  }
2525  } else {
2526  int y, t = 1 << col_and_7, m_col = (t << w) - t;
2527 
2528  if (!skip_inter) {
2529  int mask_id = (tx == TX_8X8);
2530  int l2 = tx + is_uv - 1, step1d = 1 << l2;
2531  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2532  int m_row = m_col & masks[l2];
2533 
2534  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2535  // 8wd loopfilter to prevent going off the visible edge.
2536  if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2537  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2538  int m_row_8 = m_row - m_row_16;
2539 
2540  for (y = row_and_7; y < h + row_and_7; y++) {
2541  lflvl->mask[is_uv][0][y][0] |= m_row_16;
2542  lflvl->mask[is_uv][0][y][1] |= m_row_8;
2543  }
2544  } else {
2545  for (y = row_and_7; y < h + row_and_7; y++)
2546  lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2547  }
2548 
2549  if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2550  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2551  lflvl->mask[is_uv][1][y][0] |= m_col;
2552  if (y - row_and_7 == h - 1)
2553  lflvl->mask[is_uv][1][y][1] |= m_col;
2554  } else {
2555  for (y = row_and_7; y < h + row_and_7; y += step1d)
2556  lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2557  }
2558  } else if (tx != TX_4X4) {
2559  int mask_id;
2560 
2561  mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2562  lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2563  mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2564  for (y = row_and_7; y < h + row_and_7; y++)
2565  lflvl->mask[is_uv][0][y][mask_id] |= t;
2566  } else if (is_uv) {
2567  int t8 = t & 0x01, t4 = t - t8;
2568 
2569  for (y = row_and_7; y < h + row_and_7; y++) {
2570  lflvl->mask[is_uv][0][y][2] |= t4;
2571  lflvl->mask[is_uv][0][y][1] |= t8;
2572  }
2573  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2574  } else {
2575  int t8 = t & 0x11, t4 = t - t8;
2576 
2577  for (y = row_and_7; y < h + row_and_7; y++) {
2578  lflvl->mask[is_uv][0][y][2] |= t4;
2579  lflvl->mask[is_uv][0][y][1] |= t8;
2580  }
2581  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2582  }
2583  }
2584 }
2585 
2586 static int decode_b(AVCodecContext *ctx, int row, int col,
2587  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2588  enum BlockLevel bl, enum BlockPartition bp)
2589 {
2590  VP9Context *s = ctx->priv_data;
2591  VP9Block *const b = &s->b;
2592  enum BlockSize bs = bl * 3 + bp;
2593  int res, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2594  int emu[2];
2595 
2596  b->row = row;
2597  b->row7 = row & 7;
2598  b->col = col;
2599  b->col7 = col & 7;
2600  s->min_mv.x = -(128 + col * 64);
2601  s->min_mv.y = -(128 + row * 64);
2602  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2603  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2604  b->bs = bs;
2605  decode_mode(ctx);
2606  b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2607 
2608  if (!b->skip) {
2609  if ((res = decode_coeffs(ctx)) < 0)
2610  return res;
2611  } else {
2612  int pl;
2613 
2614  memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2615  memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2616  for (pl = 0; pl < 2; pl++) {
2617  memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2618  memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2619  }
2620  }
2621 
2622  // emulated overhangs if the stride of the target buffer can't hold. This
2623  // allows to support emu-edge and so on even if we have large block
2624  // overhangs
2625  emu[0] = (col + w4) * 8 > s->f->linesize[0] ||
2626  (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2627  emu[1] = (col + w4) * 4 > s->f->linesize[1] ||
2628  (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2629  if (emu[0]) {
2630  b->dst[0] = s->tmp_y;
2631  b->y_stride = 64;
2632  } else {
2633  b->dst[0] = s->f->data[0] + yoff;
2634  b->y_stride = s->f->linesize[0];
2635  }
2636  if (emu[1]) {
2637  b->dst[1] = s->tmp_uv[0];
2638  b->dst[2] = s->tmp_uv[1];
2639  b->uv_stride = 32;
2640  } else {
2641  b->dst[1] = s->f->data[1] + uvoff;
2642  b->dst[2] = s->f->data[2] + uvoff;
2643  b->uv_stride = s->f->linesize[1];
2644  }
2645  if (b->intra) {
2646  intra_recon(ctx, yoff, uvoff);
2647  } else {
2648  inter_recon(ctx);
2649  }
2650  if (emu[0]) {
2651  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2652 
2653  for (n = 0; o < w; n++) {
2654  int bw = 64 >> n;
2655 
2656  av_assert2(n <= 4);
2657  if (w & bw) {
2658  s->dsp.mc[n][0][0][0][0](s->f->data[0] + yoff + o, s->f->linesize[0],
2659  s->tmp_y + o, 64, h, 0, 0);
2660  o += bw;
2661  }
2662  }
2663  }
2664  if (emu[1]) {
2665  int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2666 
2667  for (n = 1; o < w; n++) {
2668  int bw = 64 >> n;
2669 
2670  av_assert2(n <= 4);
2671  if (w & bw) {
2672  s->dsp.mc[n][0][0][0][0](s->f->data[1] + uvoff + o, s->f->linesize[1],
2673  s->tmp_uv[0] + o, 32, h, 0, 0);
2674  s->dsp.mc[n][0][0][0][0](s->f->data[2] + uvoff + o, s->f->linesize[2],
2675  s->tmp_uv[1] + o, 32, h, 0, 0);
2676  o += bw;
2677  }
2678  }
2679  }
2680 
2681  // pick filter level and find edges to apply filter to
2682  if (s->filter.level &&
2683  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2684  [b->mode[3] != ZEROMV]) > 0) {
2685  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2686  int skip_inter = !b->intra && b->skip;
2687 
2688  for (y = 0; y < h4; y++)
2689  memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2690  mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2691  mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2692  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2693  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2694  b->uvtx, skip_inter);
2695 
2696  if (!s->filter.lim_lut[lvl]) {
2697  int sharp = s->filter.sharpness;
2698  int limit = lvl;
2699 
2700  if (sharp > 0) {
2701  limit >>= (sharp + 3) >> 2;
2702  limit = FFMIN(limit, 9 - sharp);
2703  }
2704  limit = FFMAX(limit, 1);
2705 
2706  s->filter.lim_lut[lvl] = limit;
2707  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2708  }
2709  }
2710 
2711  return 0;
2712 }
2713 
2714 static int decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2715  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2716 {
2717  VP9Context *s = ctx->priv_data;
2718  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2719  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1), res;
2720  const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2721  s->prob.p.partition[bl][c];
2722  enum BlockPartition bp;
2723  ptrdiff_t hbs = 4 >> bl;
2724 
2725  if (bl == BL_8X8) {
2726  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2727  res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2728  } else if (col + hbs < s->cols) {
2729  if (row + hbs < s->rows) {
2730  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2731  switch (bp) {
2732  case PARTITION_NONE:
2733  res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2734  break;
2735  case PARTITION_H:
2736  if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2737  yoff += hbs * 8 * s->f->linesize[0];
2738  uvoff += hbs * 4 * s->f->linesize[1];
2739  res = decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2740  }
2741  break;
2742  case PARTITION_V:
2743  if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2744  yoff += hbs * 8;
2745  uvoff += hbs * 4;
2746  res = decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2747  }
2748  break;
2749  case PARTITION_SPLIT:
2750  if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2751  if (!(res = decode_sb(ctx, row, col + hbs, lflvl,
2752  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1))) {
2753  yoff += hbs * 8 * s->f->linesize[0];
2754  uvoff += hbs * 4 * s->f->linesize[1];
2755  if (!(res = decode_sb(ctx, row + hbs, col, lflvl,
2756  yoff, uvoff, bl + 1)))
2757  res = decode_sb(ctx, row + hbs, col + hbs, lflvl,
2758  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2759  }
2760  }
2761  break;
2762  }
2763  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2764  bp = PARTITION_SPLIT;
2765  if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1)))
2766  res = decode_sb(ctx, row, col + hbs, lflvl,
2767  yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2768  } else {
2769  bp = PARTITION_H;
2770  res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2771  }
2772  } else if (row + hbs < s->rows) {
2773  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2774  bp = PARTITION_SPLIT;
2775  if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2776  yoff += hbs * 8 * s->f->linesize[0];
2777  uvoff += hbs * 4 * s->f->linesize[1];
2778  res = decode_sb(ctx, row + hbs, col, lflvl,
2779  yoff, uvoff, bl + 1);
2780  }
2781  } else {
2782  bp = PARTITION_V;
2783  res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2784  }
2785  } else {
2786  bp = PARTITION_SPLIT;
2787  res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2788  }
2789  s->counts.partition[bl][c][bp]++;
2790 
2791  return res;
2792 }
2793 
2794 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2795  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2796 {
2797  VP9Context *s = ctx->priv_data;
2798  uint8_t *dst = s->f->data[0] + yoff, *lvl = lflvl->level;
2799  ptrdiff_t ls_y = s->f->linesize[0], ls_uv = s->f->linesize[1];
2800  int y, x, p;
2801 
2802  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2803  // if you think of them as acting on a 8x8 block max, we can interleave
2804  // each v/h within the single x loop, but that only works if we work on
2805  // 8 pixel blocks, and we won't always do that (we want at least 16px
2806  // to use SSE2 optimizations, perhaps 32 for AVX2)
2807 
2808  // filter edges between columns, Y plane (e.g. block1 | block2)
2809  for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2810  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2811  uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2812  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2813  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2814  unsigned hm = hm1 | hm2 | hm13 | hm23;
2815 
2816  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2817  if (hm1 & x) {
2818  int L = *l, H = L >> 4;
2819  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2820 
2821  if (col || x > 1) {
2822  if (hmask1[0] & x) {
2823  if (hmask2[0] & x) {
2824  av_assert2(l[8] == L);
2825  s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
2826  } else {
2827  s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
2828  }
2829  } else if (hm2 & x) {
2830  L = l[8];
2831  H |= (L >> 4) << 8;
2832  E |= s->filter.mblim_lut[L] << 8;
2833  I |= s->filter.lim_lut[L] << 8;
2834  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2835  [!!(hmask2[1] & x)]
2836  [0](ptr, ls_y, E, I, H);
2837  } else {
2838  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2839  [0](ptr, ls_y, E, I, H);
2840  }
2841  }
2842  } else if (hm2 & x) {
2843  int L = l[8], H = L >> 4;
2844  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2845 
2846  if (col || x > 1) {
2847  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2848  [0](ptr + 8 * ls_y, ls_y, E, I, H);
2849  }
2850  }
2851  if (hm13 & x) {
2852  int L = *l, H = L >> 4;
2853  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2854 
2855  if (hm23 & x) {
2856  L = l[8];
2857  H |= (L >> 4) << 8;
2858  E |= s->filter.mblim_lut[L] << 8;
2859  I |= s->filter.lim_lut[L] << 8;
2860  s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
2861  } else {
2862  s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
2863  }
2864  } else if (hm23 & x) {
2865  int L = l[8], H = L >> 4;
2866  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2867 
2868  s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
2869  }
2870  }
2871  }
2872 
2873  // block1
2874  // filter edges between rows, Y plane (e.g. ------)
2875  // block2
2876  dst = s->f->data[0] + yoff;
2877  lvl = lflvl->level;
2878  for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
2879  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
2880  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
2881 
2882  for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
2883  if (row || y) {
2884  if (vm & x) {
2885  int L = *l, H = L >> 4;
2886  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2887 
2888  if (vmask[0] & x) {
2889  if (vmask[0] & (x << 1)) {
2890  av_assert2(l[1] == L);
2891  s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
2892  } else {
2893  s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
2894  }
2895  } else if (vm & (x << 1)) {
2896  L = l[1];
2897  H |= (L >> 4) << 8;
2898  E |= s->filter.mblim_lut[L] << 8;
2899  I |= s->filter.lim_lut[L] << 8;
2900  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
2901  [!!(vmask[1] & (x << 1))]
2902  [1](ptr, ls_y, E, I, H);
2903  } else {
2904  s->dsp.loop_filter_8[!!(vmask[1] & x)]
2905  [1](ptr, ls_y, E, I, H);
2906  }
2907  } else if (vm & (x << 1)) {
2908  int L = l[1], H = L >> 4;
2909  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2910 
2911  s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
2912  [1](ptr + 8, ls_y, E, I, H);
2913  }
2914  }
2915  if (vm3 & x) {
2916  int L = *l, H = L >> 4;
2917  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2918 
2919  if (vm3 & (x << 1)) {
2920  L = l[1];
2921  H |= (L >> 4) << 8;
2922  E |= s->filter.mblim_lut[L] << 8;
2923  I |= s->filter.lim_lut[L] << 8;
2924  s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
2925  } else {
2926  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
2927  }
2928  } else if (vm3 & (x << 1)) {
2929  int L = l[1], H = L >> 4;
2930  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2931 
2932  s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
2933  }
2934  }
2935  }
2936 
2937  // same principle but for U/V planes
2938  for (p = 0; p < 2; p++) {
2939  lvl = lflvl->level;
2940  dst = s->f->data[1 + p] + uvoff;
2941  for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
2942  uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
2943  uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
2944  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
2945  unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
2946 
2947  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
2948  if (col || x > 1) {
2949  if (hm1 & x) {
2950  int L = *l, H = L >> 4;
2951  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2952 
2953  if (hmask1[0] & x) {
2954  if (hmask2[0] & x) {
2955  av_assert2(l[16] == L);
2956  s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
2957  } else {
2958  s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
2959  }
2960  } else if (hm2 & x) {
2961  L = l[16];
2962  H |= (L >> 4) << 8;
2963  E |= s->filter.mblim_lut[L] << 8;
2964  I |= s->filter.lim_lut[L] << 8;
2965  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2966  [!!(hmask2[1] & x)]
2967  [0](ptr, ls_uv, E, I, H);
2968  } else {
2969  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2970  [0](ptr, ls_uv, E, I, H);
2971  }
2972  } else if (hm2 & x) {
2973  int L = l[16], H = L >> 4;
2974  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2975 
2976  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2977  [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
2978  }
2979  }
2980  if (x & 0xAA)
2981  l += 2;
2982  }
2983  }
2984  lvl = lflvl->level;
2985  dst = s->f->data[1 + p] + uvoff;
2986  for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
2987  uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
2988  unsigned vm = vmask[0] | vmask[1] | vmask[2];
2989 
2990  for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
2991  if (row || y) {
2992  if (vm & x) {
2993  int L = *l, H = L >> 4;
2994  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2995 
2996  if (vmask[0] & x) {
2997  if (vmask[0] & (x << 2)) {
2998  av_assert2(l[2] == L);
2999  s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3000  } else {
3001  s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3002  }
3003  } else if (vm & (x << 2)) {
3004  L = l[2];
3005  H |= (L >> 4) << 8;
3006  E |= s->filter.mblim_lut[L] << 8;
3007  I |= s->filter.lim_lut[L] << 8;
3008  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3009  [!!(vmask[1] & (x << 2))]
3010  [1](ptr, ls_uv, E, I, H);
3011  } else {
3012  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3013  [1](ptr, ls_uv, E, I, H);
3014  }
3015  } else if (vm & (x << 2)) {
3016  int L = l[2], H = L >> 4;
3017  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3018 
3019  s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3020  [1](ptr + 8, ls_uv, E, I, H);
3021  }
3022  }
3023  }
3024  if (y & 1)
3025  lvl += 16;
3026  }
3027  }
3028 }
3029 
3030 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3031 {
3032  int sb_start = ( idx * n) >> log2_n;
3033  int sb_end = ((idx + 1) * n) >> log2_n;
3034  *start = FFMIN(sb_start, n) << 3;
3035  *end = FFMIN(sb_end, n) << 3;
3036 }
3037 
3038 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3039  int max_count, int update_factor)
3040 {
3041  unsigned ct = ct0 + ct1, p2, p1;
3042 
3043  if (!ct)
3044  return;
3045 
3046  p1 = *p;
3047  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3048  p2 = av_clip(p2, 1, 255);
3049  ct = FFMIN(ct, max_count);
3050  update_factor = FASTDIV(update_factor * ct, max_count);
3051 
3052  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3053  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3054 }
3055 
3056 static void adapt_probs(VP9Context *s)
3057 {
3058  int i, j, k, l, m;
3059  prob_context *p = &s->prob_ctx[s->framectxid].p;
3060  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3061 
3062  // coefficients
3063  for (i = 0; i < 4; i++)
3064  for (j = 0; j < 2; j++)
3065  for (k = 0; k < 2; k++)
3066  for (l = 0; l < 6; l++)
3067  for (m = 0; m < 6; m++) {
3068  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3069  unsigned *e = s->counts.eob[i][j][k][l][m];
3070  unsigned *c = s->counts.coef[i][j][k][l][m];
3071 
3072  if (l == 0 && m >= 3) // dc only has 3 pt
3073  break;
3074 
3075  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3076  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3077  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3078  }
3079 
3080  if (s->keyframe || s->intraonly) {
3081  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3082  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3083  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3084  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3085  return;
3086  }
3087 
3088  // skip flag
3089  for (i = 0; i < 3; i++)
3090  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3091 
3092  // intra/inter flag
3093  for (i = 0; i < 4; i++)
3094  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3095 
3096  // comppred flag
3097  if (s->comppredmode == PRED_SWITCHABLE) {
3098  for (i = 0; i < 5; i++)
3099  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3100  }
3101 
3102  // reference frames
3103  if (s->comppredmode != PRED_SINGLEREF) {
3104  for (i = 0; i < 5; i++)
3105  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3106  s->counts.comp_ref[i][1], 20, 128);
3107  }
3108 
3109  if (s->comppredmode != PRED_COMPREF) {
3110  for (i = 0; i < 5; i++) {
3111  uint8_t *pp = p->single_ref[i];
3112  unsigned (*c)[2] = s->counts.single_ref[i];
3113 
3114  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3115  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3116  }
3117  }
3118 
3119  // block partitioning
3120  for (i = 0; i < 4; i++)
3121  for (j = 0; j < 4; j++) {
3122  uint8_t *pp = p->partition[i][j];
3123  unsigned *c = s->counts.partition[i][j];
3124 
3125  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3126  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3127  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3128  }
3129 
3130  // tx size
3131  if (s->txfmmode == TX_SWITCHABLE) {
3132  for (i = 0; i < 2; i++) {
3133  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3134 
3135  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3136  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3137  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3138  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3139  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3140  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3141  }
3142  }
3143 
3144  // interpolation filter
3145  if (s->filtermode == FILTER_SWITCHABLE) {
3146  for (i = 0; i < 4; i++) {
3147  uint8_t *pp = p->filter[i];
3148  unsigned *c = s->counts.filter[i];
3149 
3150  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3151  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3152  }
3153  }
3154 
3155  // inter modes
3156  for (i = 0; i < 7; i++) {
3157  uint8_t *pp = p->mv_mode[i];
3158  unsigned *c = s->counts.mv_mode[i];
3159 
3160  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3161  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3162  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3163  }
3164 
3165  // mv joints
3166  {
3167  uint8_t *pp = p->mv_joint;
3168  unsigned *c = s->counts.mv_joint;
3169 
3170  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3171  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3172  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3173  }
3174 
3175  // mv components
3176  for (i = 0; i < 2; i++) {
3177  uint8_t *pp;
3178  unsigned *c, (*c2)[2], sum;
3179 
3180  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3181  s->counts.mv_comp[i].sign[1], 20, 128);
3182 
3183  pp = p->mv_comp[i].classes;
3184  c = s->counts.mv_comp[i].classes;
3185  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3186  adapt_prob(&pp[0], c[0], sum, 20, 128);
3187  sum -= c[1];
3188  adapt_prob(&pp[1], c[1], sum, 20, 128);
3189  sum -= c[2] + c[3];
3190  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3191  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3192  sum -= c[4] + c[5];
3193  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3194  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3195  sum -= c[6];
3196  adapt_prob(&pp[6], c[6], sum, 20, 128);
3197  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3198  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3199  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3200 
3201  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3202  s->counts.mv_comp[i].class0[1], 20, 128);
3203  pp = p->mv_comp[i].bits;
3204  c2 = s->counts.mv_comp[i].bits;
3205  for (j = 0; j < 10; j++)
3206  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3207 
3208  for (j = 0; j < 2; j++) {
3209  pp = p->mv_comp[i].class0_fp[j];
3210  c = s->counts.mv_comp[i].class0_fp[j];
3211  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3212  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3213  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3214  }
3215  pp = p->mv_comp[i].fp;
3216  c = s->counts.mv_comp[i].fp;
3217  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3218  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3219  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3220 
3221  if (s->highprecisionmvs) {
3222  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3223  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3224  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3225  s->counts.mv_comp[i].hp[1], 20, 128);
3226  }
3227  }
3228 
3229  // y intra modes
3230  for (i = 0; i < 4; i++) {
3231  uint8_t *pp = p->y_mode[i];
3232  unsigned *c = s->counts.y_mode[i], sum, s2;
3233 
3234  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3235  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3236  sum -= c[TM_VP8_PRED];
3237  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3238  sum -= c[VERT_PRED];
3239  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3240  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3241  sum -= s2;
3242  adapt_prob(&pp[3], s2, sum, 20, 128);
3243  s2 -= c[HOR_PRED];
3244  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3245  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3246  sum -= c[DIAG_DOWN_LEFT_PRED];
3247  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3248  sum -= c[VERT_LEFT_PRED];
3249  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3250  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3251  }
3252 
3253  // uv intra modes
3254  for (i = 0; i < 10; i++) {
3255  uint8_t *pp = p->uv_mode[i];
3256  unsigned *c = s->counts.uv_mode[i], sum, s2;
3257 
3258  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3259  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3260  sum -= c[TM_VP8_PRED];
3261  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3262  sum -= c[VERT_PRED];
3263  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3264  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3265  sum -= s2;
3266  adapt_prob(&pp[3], s2, sum, 20, 128);
3267  s2 -= c[HOR_PRED];
3268  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3269  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3270  sum -= c[DIAG_DOWN_LEFT_PRED];
3271  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3272  sum -= c[VERT_LEFT_PRED];
3273  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3274  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3275  }
3276 }
3277 
3278 static int vp9_decode_frame(AVCodecContext *ctx, void *out_pic,
3279  int *got_frame, const uint8_t *data, int size)
3280 {
3281  VP9Context *s = ctx->priv_data;
3282  int res, tile_row, tile_col, i, ref, row, col;
3283  ptrdiff_t yoff = 0, uvoff = 0;
3284  //AVFrame *prev_frame = s->f; // for segmentation map
3285 
3286  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3287  return res;
3288  } else if (res == 0) {
3289  if (!s->refs[ref]) {
3290  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3291  return AVERROR_INVALIDDATA;
3292  }
3293  if ((res = av_frame_ref(out_pic, s->refs[ref])) < 0)
3294  return res;
3295  *got_frame = 1;
3296  return 0;
3297  }
3298  data += res;
3299  size -= res;
3300 
3301  // discard old references
3302  for (i = 0; i < 10; i++) {
3303  AVFrame *f = s->fb[i];
3304  if (f->data[0] && f != s->f &&
3305  f != s->refs[0] && f != s->refs[1] &&
3306  f != s->refs[2] && f != s->refs[3] &&
3307  f != s->refs[4] && f != s->refs[5] &&
3308  f != s->refs[6] && f != s->refs[7])
3309  av_frame_unref(f);
3310  }
3311 
3312  // find unused reference
3313  for (i = 0; i < 10; i++)
3314  if (!s->fb[i]->data[0])
3315  break;
3316  av_assert0(i < 10);
3317  s->f = s->fb[i];
3318  if ((res = ff_get_buffer(ctx, s->f,
3319  s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
3320  return res;
3321  s->f->key_frame = s->keyframe;
3323 
3324  // main tile decode loop
3325  memset(s->above_partition_ctx, 0, s->cols);
3326  memset(s->above_skip_ctx, 0, s->cols);
3327  if (s->keyframe || s->intraonly) {
3328  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3329  } else {
3330  memset(s->above_mode_ctx, NEARESTMV, s->cols);
3331  }
3332  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3333  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3334  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3335  memset(s->above_segpred_ctx, 0, s->cols);
3336  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3338  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3339  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3340  unsigned tile_size;
3341 
3342  if (tile_col == s->tiling.tile_cols - 1 &&
3343  tile_row == s->tiling.tile_rows - 1) {
3344  tile_size = size;
3345  } else {
3346  tile_size = AV_RB32(data);
3347  data += 4;
3348  size -= 4;
3349  }
3350  if (tile_size > size)
3351  return AVERROR_INVALIDDATA;
3352  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3353  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
3354  return AVERROR_INVALIDDATA;
3355  data += tile_size;
3356  size -= tile_size;
3357  }
3358 
3359  for (row = s->tiling.tile_row_start;
3360  row < s->tiling.tile_row_end;
3361  row += 8, yoff += s->f->linesize[0] * 64,
3362  uvoff += s->f->linesize[1] * 32) {
3363  struct VP9Filter *lflvl_ptr = s->lflvl;
3364  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3365 
3366  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3368  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3369 
3370  memset(s->left_partition_ctx, 0, 8);
3371  memset(s->left_skip_ctx, 0, 8);
3372  if (s->keyframe || s->intraonly) {
3373  memset(s->left_mode_ctx, DC_PRED, 16);
3374  } else {
3375  memset(s->left_mode_ctx, NEARESTMV, 8);
3376  }
3377  memset(s->left_y_nnz_ctx, 0, 16);
3378  memset(s->left_uv_nnz_ctx, 0, 16);
3379  memset(s->left_segpred_ctx, 0, 8);
3380 
3381  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3382  for (col = s->tiling.tile_col_start;
3383  col < s->tiling.tile_col_end;
3384  col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3385  // FIXME integrate with lf code (i.e. zero after each
3386  // use, similar to invtxfm coefficients, or similar)
3387  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3388 
3389  if ((res = decode_sb(ctx, row, col, lflvl_ptr,
3390  yoff2, uvoff2, BL_64X64)) < 0)
3391  return res;
3392  }
3393  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3394  }
3395 
3396  // backup pre-loopfilter reconstruction data for intra
3397  // prediction of next row of sb64s
3398  if (row + 8 < s->rows) {
3399  memcpy(s->intra_pred_data[0],
3400  s->f->data[0] + yoff + 63 * s->f->linesize[0],
3401  8 * s->cols);
3402  memcpy(s->intra_pred_data[1],
3403  s->f->data[1] + uvoff + 31 * s->f->linesize[1],
3404  4 * s->cols);
3405  memcpy(s->intra_pred_data[2],
3406  s->f->data[2] + uvoff + 31 * s->f->linesize[2],
3407  4 * s->cols);
3408  }
3409 
3410  // loopfilter one row
3411  if (s->filter.level) {
3412  yoff2 = yoff;
3413  uvoff2 = uvoff;
3414  lflvl_ptr = s->lflvl;
3415  for (col = 0; col < s->cols;
3416  col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3417  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3418  }
3419  }
3420  }
3421  }
3422 
3423  // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
3424  // probability maintenance between frames)
3425  if (s->refreshctx) {
3426  if (s->parallelmode) {
3427  int i, j, k, l, m;
3428 
3429  for (i = 0; i < 4; i++)
3430  for (j = 0; j < 2; j++)
3431  for (k = 0; k < 2; k++)
3432  for (l = 0; l < 6; l++)
3433  for (m = 0; m < 6; m++)
3434  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3435  s->prob.coef[i][j][k][l][m], 3);
3436  s->prob_ctx[s->framectxid].p = s->prob.p;
3437  } else {
3438  adapt_probs(s);
3439  }
3440  }
3441  FFSWAP(struct VP9mvrefPair *, s->mv[0], s->mv[1]);
3442 
3443  // ref frame setup
3444  for (i = 0; i < 8; i++)
3445  if (s->refreshrefmask & (1 << i))
3446  s->refs[i] = s->f;
3447 
3448  if (!s->invisible) {
3449  if ((res = av_frame_ref(out_pic, s->f)) < 0)
3450  return res;
3451  *got_frame = 1;
3452  }
3453 
3454  return 0;
3455 }
3456 
3457 static int vp9_decode_packet(AVCodecContext *avctx, void *out_pic,
3458  int *got_frame, AVPacket *avpkt)
3459 {
3460  const uint8_t *data = avpkt->data;
3461  int size = avpkt->size, marker, res;
3462 
3463  // read superframe index - this is a collection of individual frames that
3464  // together lead to one visible frame
3465  av_assert1(size > 0); // without CODEC_CAP_DELAY, this is implied
3466  marker = data[size - 1];
3467  if ((marker & 0xe0) == 0xc0) {
3468  int nbytes = 1 + ((marker >> 3) & 0x3);
3469  int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
3470 
3471  if (size >= idx_sz && data[size - idx_sz] == marker) {
3472  const uint8_t *idx = data + size + 1 - idx_sz;
3473  switch (nbytes) {
3474 #define case_n(a, rd) \
3475  case a: \
3476  while (n_frames--) { \
3477  int sz = rd; \
3478  idx += a; \
3479  if (sz > size) { \
3480  av_log(avctx, AV_LOG_ERROR, \
3481  "Superframe packet size too big: %d > %d\n", \
3482  sz, size); \
3483  return AVERROR_INVALIDDATA; \
3484  } \
3485  res = vp9_decode_frame(avctx, out_pic, got_frame, \
3486  data, sz); \
3487  if (res < 0) \
3488  return res; \
3489  data += sz; \
3490  size -= sz; \
3491  } \
3492  break;
3493  case_n(1, *idx);
3494  case_n(2, AV_RL16(idx));
3495  case_n(3, AV_RL24(idx));
3496  case_n(4, AV_RL32(idx));
3497  }
3498  return avpkt->size;
3499  }
3500  }
3501  // if we get here, there was no valid superframe index, i.e. this is just
3502  // one whole single frame - decode it as such from the complete input buf
3503  if ((res = vp9_decode_frame(avctx, out_pic, got_frame, data, size)) < 0)
3504  return res;
3505  return avpkt->size;
3506 }
3507 
3509 {
3510  VP9Context *s = ctx->priv_data;
3511  int i;
3512 
3513  for (i = 0; i < 10; i++)
3514  if (s->fb[i]->data[0])
3515  av_frame_unref(s->fb[i]);
3516  for (i = 0; i < 8; i++)
3517  s->refs[i] = NULL;
3518  s->f = NULL;
3519 }
3520 
3522 {
3523  VP9Context *s = ctx->priv_data;
3524  int i;
3525 
3526  ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3527  ff_vp9dsp_init(&s->dsp);
3528  ff_videodsp_init(&s->vdsp, 8);
3529  for (i = 0; i < 10; i++) {
3530  s->fb[i] = av_frame_alloc();
3531  if (!s->fb[i]) {
3532  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3533  return AVERROR(ENOMEM);
3534  }
3535  }
3536  s->filter.sharpness = -1;
3537 
3538  return 0;
3539 }
3540 
3542 {
3543  VP9Context *s = ctx->priv_data;
3544  int i;
3545 
3546  for (i = 0; i < 10; i++) {
3547  if (s->fb[i]->data[0])
3548  av_frame_unref(s->fb[i]);
3549  av_frame_free(&s->fb[i]);
3550  }
3552  s->above_skip_ctx = s->above_txfm_ctx = s->above_mode_ctx = NULL;
3553  s->above_y_nnz_ctx = s->above_uv_nnz_ctx[0] = s->above_uv_nnz_ctx[1] = NULL;
3554  s->intra_pred_data[0] = s->intra_pred_data[1] = s->intra_pred_data[2] = NULL;
3555  s->above_segpred_ctx = s->above_intra_ctx = s->above_comp_ctx = NULL;
3556  s->above_ref_ctx = s->above_filter_ctx = NULL;
3557  s->above_mv_ctx = NULL;
3558  s->segmentation_map = NULL;
3559  s->mv[0] = s->mv[1] = NULL;
3560  s->lflvl = NULL;
3561  av_freep(&s->c_b);
3562  s->c_b_size = 0;
3563 
3564  return 0;
3565 }
3566 
3568  .name = "vp9",
3569  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3570  .type = AVMEDIA_TYPE_VIDEO,
3571  .id = AV_CODEC_ID_VP9,
3572  .priv_data_size = sizeof(VP9Context),
3573  .init = vp9_decode_init,
3576  .capabilities = CODEC_CAP_DR1,
3578 };