FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "thread.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dsp.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
35 
36 #define VP9_SYNCCODE 0x498342
37 
42 };
43 
44 enum BlockLevel {
49 };
50 
51 enum BlockSize {
66 };
67 
68 struct VP9mvrefPair {
69  VP56mv mv[2];
70  int8_t ref[2];
71 };
72 
73 typedef struct VP9Frame {
77  struct VP9mvrefPair *mv;
79 } VP9Frame;
80 
81 struct VP9Filter {
82  uint8_t level[8 * 8];
83  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 };
86 
87 typedef struct VP9Block {
90  VP56mv mv[4 /* b_idx */][2 /* ref */];
91  enum BlockSize bs;
92  enum TxfmMode tx, uvtx;
93  enum BlockLevel bl;
95 } VP9Block;
96 
97 typedef struct VP9Context {
103  unsigned c_b_size;
105  int pass;
106  int row, row7, col, col7;
108  ptrdiff_t y_stride, uv_stride;
109 
110  // bitstream header
131 #define CUR_FRAME 0
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
135 
136  struct {
138  int8_t sharpness;
141  } filter;
142  struct {
144  int8_t mode[2];
145  int8_t ref[4];
146  } lf_delta;
150 #define MAX_SEGMENT 8
151  struct {
157  struct {
163  int16_t q_val;
164  int8_t lf_val;
165  int16_t qmul[2][2];
166  uint8_t lflvl[4][2];
167  } feat[MAX_SEGMENT];
168  } segmentation;
169  struct {
171  unsigned tile_cols, tile_rows;
173  } tiling;
174  unsigned sb_cols, sb_rows, rows, cols;
175  struct {
177  uint8_t coef[4][2][2][6][6][3];
178  } prob_ctx[4];
179  struct {
180  prob_context p;
181  uint8_t coef[4][2][2][6][6][11];
184  } prob;
185  struct {
186  unsigned y_mode[4][10];
187  unsigned uv_mode[10][10];
188  unsigned filter[4][3];
189  unsigned mv_mode[7][4];
190  unsigned intra[4][2];
191  unsigned comp[5][2];
192  unsigned single_ref[5][2][2];
193  unsigned comp_ref[5][2];
194  unsigned tx32p[2][4];
195  unsigned tx16p[2][3];
196  unsigned tx8p[2][2];
197  unsigned skip[3][2];
198  unsigned mv_joint[4];
199  struct {
200  unsigned sign[2];
201  unsigned classes[11];
202  unsigned class0[2];
203  unsigned bits[10][2];
204  unsigned class0_fp[2][4];
205  unsigned fp[4];
206  unsigned class0_hp[2];
207  unsigned hp[2];
208  } mv_comp[2];
209  unsigned partition[4][4][4];
210  unsigned coef[4][2][2][6][6][3];
211  unsigned eob[4][2][2][6][6][2];
212  } counts;
215 
216  // contextual (left/above) cache
231  // FIXME maybe merge some of the below in a flags field?
242 
243  // whole-frame cache
245  struct VP9Filter *lflvl;
247 
248  // block reconstruction intermediates
250  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
252  struct { int x, y; } min_mv, max_mv;
253  DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255  uint16_t mvscale[3][2];
257 } VP9Context;
258 
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260  {
261  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263  }, {
264  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
266  }
267 };
268 
270 {
271  VP9Context *s = ctx->priv_data;
272  int ret, sz;
273 
274  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275  return ret;
276  sz = 64 * s->sb_cols * s->sb_rows;
277  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278  ff_thread_release_buffer(ctx, &f->tf);
279  return AVERROR(ENOMEM);
280  }
281 
283  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
284 
285  return 0;
286 }
287 
289 {
290  ff_thread_release_buffer(ctx, &f->tf);
292 }
293 
295 {
296  int res;
297 
298  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
299  return res;
300  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301  vp9_unref_frame(ctx, dst);
302  return AVERROR(ENOMEM);
303  }
304 
306  dst->mv = src->mv;
307  dst->uses_2pass = src->uses_2pass;
308 
309  return 0;
310 }
311 
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
313 {
314  VP9Context *s = ctx->priv_data;
315  uint8_t *p;
316  int bytesperpixel = s->bytesperpixel;
317 
318  av_assert0(w > 0 && h > 0);
319 
320  if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
321  return 0;
322 
323  ctx->width = w;
324  ctx->height = h;
325  ctx->pix_fmt = fmt;
326  s->sb_cols = (w + 63) >> 6;
327  s->sb_rows = (h + 63) >> 6;
328  s->cols = (w + 7) >> 3;
329  s->rows = (h + 7) >> 3;
330 
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332  av_freep(&s->intra_pred_data[0]);
333  // FIXME we slightly over-allocate here for subsampled chroma, but a little
334  // bit of padding shouldn't affect performance...
335  p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336  sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
337  if (!p)
338  return AVERROR(ENOMEM);
339  assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340  assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341  assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342  assign(s->above_y_nnz_ctx, uint8_t *, 16);
343  assign(s->above_mode_ctx, uint8_t *, 16);
344  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345  assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346  assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
348  assign(s->above_skip_ctx, uint8_t *, 8);
349  assign(s->above_txfm_ctx, uint8_t *, 8);
350  assign(s->above_segpred_ctx, uint8_t *, 8);
351  assign(s->above_intra_ctx, uint8_t *, 8);
352  assign(s->above_comp_ctx, uint8_t *, 8);
353  assign(s->above_ref_ctx, uint8_t *, 8);
354  assign(s->above_filter_ctx, uint8_t *, 8);
355  assign(s->lflvl, struct VP9Filter *, 1);
356 #undef assign
357 
358  // these will be re-allocated a little later
359  av_freep(&s->b_base);
360  av_freep(&s->block_base);
361 
362  if (s->bpp != s->last_bpp) {
363  ff_vp9dsp_init(&s->dsp, s->bpp);
364  ff_videodsp_init(&s->vdsp, s->bpp);
365  s->last_bpp = s->bpp;
366  }
367 
368  return 0;
369 }
370 
372 {
373  VP9Context *s = ctx->priv_data;
374  int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
375 
377  return 0;
378 
379  av_free(s->b_base);
380  av_free(s->block_base);
381  chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382  chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383  if (s->frames[CUR_FRAME].uses_2pass) {
384  int sbs = s->sb_cols * s->sb_rows;
385 
386  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387  s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388  16 * 16 + 2 * chroma_eobs) * sbs);
389  if (!s->b_base || !s->block_base)
390  return AVERROR(ENOMEM);
391  s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392  s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394  s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
396  } else {
397  s->b_base = av_malloc(sizeof(VP9Block));
398  s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399  16 * 16 + 2 * chroma_eobs);
400  if (!s->b_base || !s->block_base)
401  return AVERROR(ENOMEM);
402  s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403  s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404  s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405  s->uveob_base[0] = s->eob_base + 16 * 16;
406  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
407  }
409 
410  return 0;
411 }
412 
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
415 {
416  int v = get_bits(gb, n);
417  return get_bits1(gb) ? -v : v;
418 }
419 
421 {
422  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
423 }
424 
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
427 {
428  static const int inv_map_table[255] = {
429  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
447  252, 253, 253,
448  };
449  int d;
450 
451  /* This code is trying to do a differential probability update. For a
452  * current probability A in the range [1, 255], the difference to a new
453  * probability of any value can be expressed differentially as 1-A,255-A
454  * where some part of this (absolute range) exists both in positive as
455  * well as the negative part, whereas another part only exists in one
456  * half. We're trying to code this shared part differentially, i.e.
457  * times two where the value of the lowest bit specifies the sign, and
458  * the single part is then coded on top of this. This absolute difference
459  * then again has a value of [0,254], but a bigger value in this range
460  * indicates that we're further away from the original value A, so we
461  * can code this as a VLC code, since higher values are increasingly
462  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463  * updates vs. the 'fine, exact' updates further down the range, which
464  * adds one extra dimension to this differential update model. */
465 
466  if (!vp8_rac_get(c)) {
467  d = vp8_rac_get_uint(c, 4) + 0;
468  } else if (!vp8_rac_get(c)) {
469  d = vp8_rac_get_uint(c, 4) + 16;
470  } else if (!vp8_rac_get(c)) {
471  d = vp8_rac_get_uint(c, 5) + 32;
472  } else {
473  d = vp8_rac_get_uint(c, 7);
474  if (d >= 65)
475  d = (d << 1) - 65 + vp8_rac_get(c);
476  d += 64;
477  av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
478  }
479 
480  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
482 }
483 
485 {
486  static const enum AVColorSpace colorspaces[8] = {
489  };
490  VP9Context *s = ctx->priv_data;
491  enum AVPixelFormat res;
492  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
493 
494  s->bpp_index = bits;
495  s->bpp = 8 + bits * 2;
496  s->bytesperpixel = (7 + s->bpp) >> 3;
497  ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499  static const enum AVPixelFormat pix_fmt_rgb[3] = {
501  };
502  if (ctx->profile & 1) {
503  s->ss_h = s->ss_v = 1;
504  res = pix_fmt_rgb[bits];
506  } else {
507  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
508  ctx->profile);
509  return AVERROR_INVALIDDATA;
510  }
511  } else {
512  static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
519  };
521  if (ctx->profile & 1) {
522  s->ss_h = get_bits1(&s->gb);
523  s->ss_v = get_bits1(&s->gb);
524  if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
525  av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
526  ctx->profile);
527  return AVERROR_INVALIDDATA;
528  } else if (get_bits1(&s->gb)) {
529  av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
530  ctx->profile);
531  return AVERROR_INVALIDDATA;
532  }
533  } else {
534  s->ss_h = s->ss_v = 1;
535  res = pix_fmt_for_ss[bits][1][1];
536  }
537  }
538 
539  return res;
540 }
541 
543  const uint8_t *data, int size, int *ref)
544 {
545  VP9Context *s = ctx->priv_data;
546  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
547  enum AVPixelFormat fmt = ctx->pix_fmt;
548  int last_invisible;
549  const uint8_t *data2;
550 
551  /* general header */
552  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
553  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
554  return res;
555  }
556  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
557  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
558  return AVERROR_INVALIDDATA;
559  }
560  ctx->profile = get_bits1(&s->gb);
561  ctx->profile |= get_bits1(&s->gb) << 1;
562  if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
563  if (ctx->profile > 3) {
564  av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
565  return AVERROR_INVALIDDATA;
566  }
567  if (get_bits1(&s->gb)) {
568  *ref = get_bits(&s->gb, 3);
569  return 0;
570  }
571  s->last_keyframe = s->keyframe;
572  s->keyframe = !get_bits1(&s->gb);
573  last_invisible = s->invisible;
574  s->invisible = !get_bits1(&s->gb);
575  s->errorres = get_bits1(&s->gb);
576  s->use_last_frame_mvs = !s->errorres && !last_invisible;
577  if (s->keyframe) {
578  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
579  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
580  return AVERROR_INVALIDDATA;
581  }
582  if ((fmt = read_colorspace_details(ctx)) < 0)
583  return fmt;
584  // for profile 1, here follows the subsampling bits
585  s->refreshrefmask = 0xff;
586  w = get_bits(&s->gb, 16) + 1;
587  h = get_bits(&s->gb, 16) + 1;
588  if (get_bits1(&s->gb)) // display size
589  skip_bits(&s->gb, 32);
590  } else {
591  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
592  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
593  if (s->intraonly) {
594  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
595  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
596  return AVERROR_INVALIDDATA;
597  }
598  if (ctx->profile >= 1) {
599  if ((fmt = read_colorspace_details(ctx)) < 0)
600  return fmt;
601  } else {
602  s->ss_h = s->ss_v = 1;
603  s->bpp = 8;
604  s->bpp_index = 0;
605  s->bytesperpixel = 1;
606  fmt = AV_PIX_FMT_YUV420P;
609  }
610  s->refreshrefmask = get_bits(&s->gb, 8);
611  w = get_bits(&s->gb, 16) + 1;
612  h = get_bits(&s->gb, 16) + 1;
613  if (get_bits1(&s->gb)) // display size
614  skip_bits(&s->gb, 32);
615  } else {
616  s->refreshrefmask = get_bits(&s->gb, 8);
617  s->refidx[0] = get_bits(&s->gb, 3);
618  s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
619  s->refidx[1] = get_bits(&s->gb, 3);
620  s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
621  s->refidx[2] = get_bits(&s->gb, 3);
622  s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
623  if (!s->refs[s->refidx[0]].f->data[0] ||
624  !s->refs[s->refidx[1]].f->data[0] ||
625  !s->refs[s->refidx[2]].f->data[0]) {
626  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
627  return AVERROR_INVALIDDATA;
628  }
629  if (get_bits1(&s->gb)) {
630  w = s->refs[s->refidx[0]].f->width;
631  h = s->refs[s->refidx[0]].f->height;
632  } else if (get_bits1(&s->gb)) {
633  w = s->refs[s->refidx[1]].f->width;
634  h = s->refs[s->refidx[1]].f->height;
635  } else if (get_bits1(&s->gb)) {
636  w = s->refs[s->refidx[2]].f->width;
637  h = s->refs[s->refidx[2]].f->height;
638  } else {
639  w = get_bits(&s->gb, 16) + 1;
640  h = get_bits(&s->gb, 16) + 1;
641  }
642  // Note that in this code, "CUR_FRAME" is actually before we
643  // have formally allocated a frame, and thus actually represents
644  // the _last_ frame
645  s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
646  s->frames[CUR_FRAME].tf.f->height == h;
647  if (get_bits1(&s->gb)) // display size
648  skip_bits(&s->gb, 32);
649  s->highprecisionmvs = get_bits1(&s->gb);
651  get_bits(&s->gb, 2);
652  s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
653  s->signbias[0] != s->signbias[2]);
654  if (s->allowcompinter) {
655  if (s->signbias[0] == s->signbias[1]) {
656  s->fixcompref = 2;
657  s->varcompref[0] = 0;
658  s->varcompref[1] = 1;
659  } else if (s->signbias[0] == s->signbias[2]) {
660  s->fixcompref = 1;
661  s->varcompref[0] = 0;
662  s->varcompref[1] = 2;
663  } else {
664  s->fixcompref = 0;
665  s->varcompref[0] = 1;
666  s->varcompref[1] = 2;
667  }
668  }
669 
670  for (i = 0; i < 3; i++) {
671  AVFrame *ref = s->refs[s->refidx[i]].f;
672  int refw = ref->width, refh = ref->height;
673 
674  if (ref->format != fmt) {
675  av_log(ctx, AV_LOG_ERROR,
676  "Ref pixfmt (%s) did not match current frame (%s)",
678  av_get_pix_fmt_name(fmt));
679  return AVERROR_INVALIDDATA;
680  } else if (refw == w && refh == h) {
681  s->mvscale[i][0] = s->mvscale[i][1] = 0;
682  } else {
683  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
684  av_log(ctx, AV_LOG_ERROR,
685  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
686  refw, refh, w, h);
687  return AVERROR_INVALIDDATA;
688  }
689  s->mvscale[i][0] = (refw << 14) / w;
690  s->mvscale[i][1] = (refh << 14) / h;
691  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
692  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
693  }
694  }
695  }
696  }
697  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
698  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
699  s->framectxid = c = get_bits(&s->gb, 2);
700 
701  /* loopfilter header data */
702  if (s->keyframe || s->errorres || s->intraonly) {
703  // reset loopfilter defaults
704  s->lf_delta.ref[0] = 1;
705  s->lf_delta.ref[1] = 0;
706  s->lf_delta.ref[2] = -1;
707  s->lf_delta.ref[3] = -1;
708  s->lf_delta.mode[0] = 0;
709  s->lf_delta.mode[1] = 0;
710  }
711  s->filter.level = get_bits(&s->gb, 6);
712  sharp = get_bits(&s->gb, 3);
713  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
714  // the old cache values since they are still valid
715  if (s->filter.sharpness != sharp)
716  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
717  s->filter.sharpness = sharp;
718  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
719  if (get_bits1(&s->gb)) {
720  for (i = 0; i < 4; i++)
721  if (get_bits1(&s->gb))
722  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
723  for (i = 0; i < 2; i++)
724  if (get_bits1(&s->gb))
725  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
726  }
727  }
728 
729  /* quantization header data */
730  s->yac_qi = get_bits(&s->gb, 8);
731  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
734  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
735  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
736 
737  /* segmentation header info */
739  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
740  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
741  for (i = 0; i < 7; i++)
742  s->prob.seg[i] = get_bits1(&s->gb) ?
743  get_bits(&s->gb, 8) : 255;
744  if ((s->segmentation.temporal = get_bits1(&s->gb))) {
745  for (i = 0; i < 3; i++)
746  s->prob.segpred[i] = get_bits1(&s->gb) ?
747  get_bits(&s->gb, 8) : 255;
748  }
749  }
750  if ((!s->segmentation.update_map || s->segmentation.temporal) &&
751  (w != s->frames[CUR_FRAME].tf.f->width ||
752  h != s->frames[CUR_FRAME].tf.f->height)) {
753  av_log(ctx, AV_LOG_WARNING,
754  "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
757  //return AVERROR_INVALIDDATA;
758  }
759 
760  if (get_bits1(&s->gb)) {
762  for (i = 0; i < 8; i++) {
763  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
764  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
765  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
766  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
767  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
768  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
769  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
770  }
771  }
772  } else {
773  s->segmentation.feat[0].q_enabled = 0;
774  s->segmentation.feat[0].lf_enabled = 0;
775  s->segmentation.feat[0].skip_enabled = 0;
776  s->segmentation.feat[0].ref_enabled = 0;
777  }
778 
779  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
780  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
781  int qyac, qydc, quvac, quvdc, lflvl, sh;
782 
783  if (s->segmentation.feat[i].q_enabled) {
785  qyac = s->segmentation.feat[i].q_val;
786  else
787  qyac = s->yac_qi + s->segmentation.feat[i].q_val;
788  } else {
789  qyac = s->yac_qi;
790  }
791  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
792  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
793  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
794  qyac = av_clip_uintp2(qyac, 8);
795 
796  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
797  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
798  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
799  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
800 
801  sh = s->filter.level >= 32;
802  if (s->segmentation.feat[i].lf_enabled) {
804  lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
805  else
806  lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
807  } else {
808  lflvl = s->filter.level;
809  }
810  if (s->lf_delta.enabled) {
811  s->segmentation.feat[i].lflvl[0][0] =
812  s->segmentation.feat[i].lflvl[0][1] =
813  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
814  for (j = 1; j < 4; j++) {
815  s->segmentation.feat[i].lflvl[j][0] =
816  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
817  s->lf_delta.mode[0]) * (1 << sh)), 6);
818  s->segmentation.feat[i].lflvl[j][1] =
819  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820  s->lf_delta.mode[1]) * (1 << sh)), 6);
821  }
822  } else {
823  memset(s->segmentation.feat[i].lflvl, lflvl,
824  sizeof(s->segmentation.feat[i].lflvl));
825  }
826  }
827 
828  /* tiling info */
829  if ((res = update_size(ctx, w, h, fmt)) < 0) {
830  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
831  return res;
832  }
833  for (s->tiling.log2_tile_cols = 0;
834  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
835  s->tiling.log2_tile_cols++) ;
836  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
837  max = FFMAX(0, max - 1);
838  while (max > s->tiling.log2_tile_cols) {
839  if (get_bits1(&s->gb))
840  s->tiling.log2_tile_cols++;
841  else
842  break;
843  }
844  s->tiling.log2_tile_rows = decode012(&s->gb);
845  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
846  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
847  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
848  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
849  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
850  if (!s->c_b) {
851  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
852  return AVERROR(ENOMEM);
853  }
854  }
855 
856  if (s->keyframe || s->errorres || s->intraonly) {
857  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
858  s->prob_ctx[3].p = vp9_default_probs;
859  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
860  sizeof(vp9_default_coef_probs));
861  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
862  sizeof(vp9_default_coef_probs));
863  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
864  sizeof(vp9_default_coef_probs));
865  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
866  sizeof(vp9_default_coef_probs));
867  }
868 
869  // next 16 bits is size of the rest of the header (arith-coded)
870  size2 = get_bits(&s->gb, 16);
871  data2 = align_get_bits(&s->gb);
872  if (size2 > size - (data2 - data)) {
873  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
874  return AVERROR_INVALIDDATA;
875  }
876  ff_vp56_init_range_decoder(&s->c, data2, size2);
877  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
878  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
879  return AVERROR_INVALIDDATA;
880  }
881 
882  if (s->keyframe || s->intraonly) {
883  memset(s->counts.coef, 0, sizeof(s->counts.coef));
884  memset(s->counts.eob, 0, sizeof(s->counts.eob));
885  } else {
886  memset(&s->counts, 0, sizeof(s->counts));
887  }
888  // FIXME is it faster to not copy here, but do it down in the fw updates
889  // as explicit copies if the fw update is missing (and skip the copy upon
890  // fw update)?
891  s->prob.p = s->prob_ctx[c].p;
892 
893  // txfm updates
894  if (s->lossless) {
895  s->txfmmode = TX_4X4;
896  } else {
897  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
898  if (s->txfmmode == 3)
899  s->txfmmode += vp8_rac_get(&s->c);
900 
901  if (s->txfmmode == TX_SWITCHABLE) {
902  for (i = 0; i < 2; i++)
903  if (vp56_rac_get_prob_branchy(&s->c, 252))
904  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
905  for (i = 0; i < 2; i++)
906  for (j = 0; j < 2; j++)
907  if (vp56_rac_get_prob_branchy(&s->c, 252))
908  s->prob.p.tx16p[i][j] =
909  update_prob(&s->c, s->prob.p.tx16p[i][j]);
910  for (i = 0; i < 2; i++)
911  for (j = 0; j < 3; j++)
912  if (vp56_rac_get_prob_branchy(&s->c, 252))
913  s->prob.p.tx32p[i][j] =
914  update_prob(&s->c, s->prob.p.tx32p[i][j]);
915  }
916  }
917 
918  // coef updates
919  for (i = 0; i < 4; i++) {
920  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
921  if (vp8_rac_get(&s->c)) {
922  for (j = 0; j < 2; j++)
923  for (k = 0; k < 2; k++)
924  for (l = 0; l < 6; l++)
925  for (m = 0; m < 6; m++) {
926  uint8_t *p = s->prob.coef[i][j][k][l][m];
927  uint8_t *r = ref[j][k][l][m];
928  if (m >= 3 && l == 0) // dc only has 3 pt
929  break;
930  for (n = 0; n < 3; n++) {
931  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
932  p[n] = update_prob(&s->c, r[n]);
933  } else {
934  p[n] = r[n];
935  }
936  }
937  p[3] = 0;
938  }
939  } else {
940  for (j = 0; j < 2; j++)
941  for (k = 0; k < 2; k++)
942  for (l = 0; l < 6; l++)
943  for (m = 0; m < 6; m++) {
944  uint8_t *p = s->prob.coef[i][j][k][l][m];
945  uint8_t *r = ref[j][k][l][m];
946  if (m > 3 && l == 0) // dc only has 3 pt
947  break;
948  memcpy(p, r, 3);
949  p[3] = 0;
950  }
951  }
952  if (s->txfmmode == i)
953  break;
954  }
955 
956  // mode updates
957  for (i = 0; i < 3; i++)
958  if (vp56_rac_get_prob_branchy(&s->c, 252))
959  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
960  if (!s->keyframe && !s->intraonly) {
961  for (i = 0; i < 7; i++)
962  for (j = 0; j < 3; j++)
963  if (vp56_rac_get_prob_branchy(&s->c, 252))
964  s->prob.p.mv_mode[i][j] =
965  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
966 
967  if (s->filtermode == FILTER_SWITCHABLE)
968  for (i = 0; i < 4; i++)
969  for (j = 0; j < 2; j++)
970  if (vp56_rac_get_prob_branchy(&s->c, 252))
971  s->prob.p.filter[i][j] =
972  update_prob(&s->c, s->prob.p.filter[i][j]);
973 
974  for (i = 0; i < 4; i++)
975  if (vp56_rac_get_prob_branchy(&s->c, 252))
976  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
977 
978  if (s->allowcompinter) {
979  s->comppredmode = vp8_rac_get(&s->c);
980  if (s->comppredmode)
981  s->comppredmode += vp8_rac_get(&s->c);
982  if (s->comppredmode == PRED_SWITCHABLE)
983  for (i = 0; i < 5; i++)
984  if (vp56_rac_get_prob_branchy(&s->c, 252))
985  s->prob.p.comp[i] =
986  update_prob(&s->c, s->prob.p.comp[i]);
987  } else {
989  }
990 
991  if (s->comppredmode != PRED_COMPREF) {
992  for (i = 0; i < 5; i++) {
993  if (vp56_rac_get_prob_branchy(&s->c, 252))
994  s->prob.p.single_ref[i][0] =
995  update_prob(&s->c, s->prob.p.single_ref[i][0]);
996  if (vp56_rac_get_prob_branchy(&s->c, 252))
997  s->prob.p.single_ref[i][1] =
998  update_prob(&s->c, s->prob.p.single_ref[i][1]);
999  }
1000  }
1001 
1002  if (s->comppredmode != PRED_SINGLEREF) {
1003  for (i = 0; i < 5; i++)
1004  if (vp56_rac_get_prob_branchy(&s->c, 252))
1005  s->prob.p.comp_ref[i] =
1006  update_prob(&s->c, s->prob.p.comp_ref[i]);
1007  }
1008 
1009  for (i = 0; i < 4; i++)
1010  for (j = 0; j < 9; j++)
1011  if (vp56_rac_get_prob_branchy(&s->c, 252))
1012  s->prob.p.y_mode[i][j] =
1013  update_prob(&s->c, s->prob.p.y_mode[i][j]);
1014 
1015  for (i = 0; i < 4; i++)
1016  for (j = 0; j < 4; j++)
1017  for (k = 0; k < 3; k++)
1018  if (vp56_rac_get_prob_branchy(&s->c, 252))
1019  s->prob.p.partition[3 - i][j][k] =
1020  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1021 
1022  // mv fields don't use the update_prob subexp model for some reason
1023  for (i = 0; i < 3; i++)
1024  if (vp56_rac_get_prob_branchy(&s->c, 252))
1025  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1026 
1027  for (i = 0; i < 2; i++) {
1028  if (vp56_rac_get_prob_branchy(&s->c, 252))
1029  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030 
1031  for (j = 0; j < 10; j++)
1032  if (vp56_rac_get_prob_branchy(&s->c, 252))
1033  s->prob.p.mv_comp[i].classes[j] =
1034  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035 
1036  if (vp56_rac_get_prob_branchy(&s->c, 252))
1037  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 
1039  for (j = 0; j < 10; j++)
1040  if (vp56_rac_get_prob_branchy(&s->c, 252))
1041  s->prob.p.mv_comp[i].bits[j] =
1042  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1043  }
1044 
1045  for (i = 0; i < 2; i++) {
1046  for (j = 0; j < 2; j++)
1047  for (k = 0; k < 3; k++)
1048  if (vp56_rac_get_prob_branchy(&s->c, 252))
1049  s->prob.p.mv_comp[i].class0_fp[j][k] =
1050  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1051 
1052  for (j = 0; j < 3; j++)
1053  if (vp56_rac_get_prob_branchy(&s->c, 252))
1054  s->prob.p.mv_comp[i].fp[j] =
1055  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056  }
1057 
1058  if (s->highprecisionmvs) {
1059  for (i = 0; i < 2; i++) {
1060  if (vp56_rac_get_prob_branchy(&s->c, 252))
1061  s->prob.p.mv_comp[i].class0_hp =
1062  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063 
1064  if (vp56_rac_get_prob_branchy(&s->c, 252))
1065  s->prob.p.mv_comp[i].hp =
1066  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1067  }
1068  }
1069  }
1070 
1071  return (data2 - data) + size2;
1072 }
1073 
1074 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1075  VP9Context *s)
1076 {
1077  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1078  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1079 }
1080 
1082  VP56mv *pmv, int ref, int z, int idx, int sb)
1083 {
1084  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1085  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1086  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1087  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1088  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1089  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1090  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1091  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1092  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1093  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1094  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1095  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1096  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1097  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1098  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1099  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1100  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1101  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1102  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1103  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1104  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1105  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1106  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1107  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1108  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1109  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1110  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1111  };
1112  VP9Block *b = s->b;
1113  int row = s->row, col = s->col, row7 = s->row7;
1114  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1115 #define INVALID_MV 0x80008000U
1116  uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1117  int i;
1118 
1119 #define RETURN_DIRECT_MV(mv) \
1120  do { \
1121  uint32_t m = AV_RN32A(&mv); \
1122  if (!idx) { \
1123  AV_WN32A(pmv, m); \
1124  return; \
1125  } else if (mem == INVALID_MV) { \
1126  mem = m; \
1127  } else if (m != mem) { \
1128  AV_WN32A(pmv, m); \
1129  return; \
1130  } \
1131  } while (0)
1132 
1133  if (sb >= 0) {
1134  if (sb == 2 || sb == 1) {
1135  RETURN_DIRECT_MV(b->mv[0][z]);
1136  } else if (sb == 3) {
1137  RETURN_DIRECT_MV(b->mv[2][z]);
1138  RETURN_DIRECT_MV(b->mv[1][z]);
1139  RETURN_DIRECT_MV(b->mv[0][z]);
1140  }
1141 
1142 #define RETURN_MV(mv) \
1143  do { \
1144  if (sb > 0) { \
1145  VP56mv tmp; \
1146  uint32_t m; \
1147  av_assert2(idx == 1); \
1148  av_assert2(mem != INVALID_MV); \
1149  if (mem_sub8x8 == INVALID_MV) { \
1150  clamp_mv(&tmp, &mv, s); \
1151  m = AV_RN32A(&tmp); \
1152  if (m != mem) { \
1153  AV_WN32A(pmv, m); \
1154  return; \
1155  } \
1156  mem_sub8x8 = AV_RN32A(&mv); \
1157  } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1158  clamp_mv(&tmp, &mv, s); \
1159  m = AV_RN32A(&tmp); \
1160  if (m != mem) { \
1161  AV_WN32A(pmv, m); \
1162  } else { \
1163  /* BUG I'm pretty sure this isn't the intention */ \
1164  AV_WN32A(pmv, 0); \
1165  } \
1166  return; \
1167  } \
1168  } else { \
1169  uint32_t m = AV_RN32A(&mv); \
1170  if (!idx) { \
1171  clamp_mv(pmv, &mv, s); \
1172  return; \
1173  } else if (mem == INVALID_MV) { \
1174  mem = m; \
1175  } else if (m != mem) { \
1176  clamp_mv(pmv, &mv, s); \
1177  return; \
1178  } \
1179  } \
1180  } while (0)
1181 
1182  if (row > 0) {
1183  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1184  if (mv->ref[0] == ref) {
1185  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1186  } else if (mv->ref[1] == ref) {
1187  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1188  }
1189  }
1190  if (col > s->tiling.tile_col_start) {
1191  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1192  if (mv->ref[0] == ref) {
1193  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1194  } else if (mv->ref[1] == ref) {
1195  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1196  }
1197  }
1198  i = 2;
1199  } else {
1200  i = 0;
1201  }
1202 
1203  // previously coded MVs in this neighbourhood, using same reference frame
1204  for (; i < 8; i++) {
1205  int c = p[i][0] + col, r = p[i][1] + row;
1206 
1207  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1208  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1209 
1210  if (mv->ref[0] == ref) {
1211  RETURN_MV(mv->mv[0]);
1212  } else if (mv->ref[1] == ref) {
1213  RETURN_MV(mv->mv[1]);
1214  }
1215  }
1216  }
1217 
1218  // MV at this position in previous frame, using same reference frame
1219  if (s->use_last_frame_mvs) {
1220  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1221 
1224  if (mv->ref[0] == ref) {
1225  RETURN_MV(mv->mv[0]);
1226  } else if (mv->ref[1] == ref) {
1227  RETURN_MV(mv->mv[1]);
1228  }
1229  }
1230 
1231 #define RETURN_SCALE_MV(mv, scale) \
1232  do { \
1233  if (scale) { \
1234  VP56mv mv_temp = { -mv.x, -mv.y }; \
1235  RETURN_MV(mv_temp); \
1236  } else { \
1237  RETURN_MV(mv); \
1238  } \
1239  } while (0)
1240 
1241  // previously coded MVs in this neighbourhood, using different reference frame
1242  for (i = 0; i < 8; i++) {
1243  int c = p[i][0] + col, r = p[i][1] + row;
1244 
1245  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1246  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1247 
1248  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1249  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1250  }
1251  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1252  // BUG - libvpx has this condition regardless of whether
1253  // we used the first ref MV and pre-scaling
1254  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1255  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1256  }
1257  }
1258  }
1259 
1260  // MV at this position in previous frame, using different reference frame
1261  if (s->use_last_frame_mvs) {
1262  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1263 
1264  // no need to await_progress, because we already did that above
1265  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1266  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1267  }
1268  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1269  // BUG - libvpx has this condition regardless of whether
1270  // we used the first ref MV and pre-scaling
1271  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1272  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1273  }
1274  }
1275 
1276  AV_ZERO32(pmv);
1277  clamp_mv(pmv, pmv, s);
1278 #undef INVALID_MV
1279 #undef RETURN_MV
1280 #undef RETURN_SCALE_MV
1281 }
1282 
1283 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1284 {
1285  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1286  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1287  s->prob.p.mv_comp[idx].classes);
1288 
1289  s->counts.mv_comp[idx].sign[sign]++;
1290  s->counts.mv_comp[idx].classes[c]++;
1291  if (c) {
1292  int m;
1293 
1294  for (n = 0, m = 0; m < c; m++) {
1295  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1296  n |= bit << m;
1297  s->counts.mv_comp[idx].bits[m][bit]++;
1298  }
1299  n <<= 3;
1300  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1301  n |= bit << 1;
1302  s->counts.mv_comp[idx].fp[bit]++;
1303  if (hp) {
1304  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1305  s->counts.mv_comp[idx].hp[bit]++;
1306  n |= bit;
1307  } else {
1308  n |= 1;
1309  // bug in libvpx - we count for bw entropy purposes even if the
1310  // bit wasn't coded
1311  s->counts.mv_comp[idx].hp[1]++;
1312  }
1313  n += 8 << c;
1314  } else {
1315  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1316  s->counts.mv_comp[idx].class0[n]++;
1317  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1318  s->prob.p.mv_comp[idx].class0_fp[n]);
1319  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1320  n = (n << 3) | (bit << 1);
1321  if (hp) {
1322  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1323  s->counts.mv_comp[idx].class0_hp[bit]++;
1324  n |= bit;
1325  } else {
1326  n |= 1;
1327  // bug in libvpx - we count for bw entropy purposes even if the
1328  // bit wasn't coded
1329  s->counts.mv_comp[idx].class0_hp[1]++;
1330  }
1331  }
1332 
1333  return sign ? -(n + 1) : (n + 1);
1334 }
1335 
1336 static void fill_mv(VP9Context *s,
1337  VP56mv *mv, int mode, int sb)
1338 {
1339  VP9Block *b = s->b;
1340 
1341  if (mode == ZEROMV) {
1342  AV_ZERO64(mv);
1343  } else {
1344  int hp;
1345 
1346  // FIXME cache this value and reuse for other subblocks
1347  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1348  mode == NEWMV ? -1 : sb);
1349  // FIXME maybe move this code into find_ref_mvs()
1350  if ((mode == NEWMV || sb == -1) &&
1351  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1352  if (mv[0].y & 1) {
1353  if (mv[0].y < 0)
1354  mv[0].y++;
1355  else
1356  mv[0].y--;
1357  }
1358  if (mv[0].x & 1) {
1359  if (mv[0].x < 0)
1360  mv[0].x++;
1361  else
1362  mv[0].x--;
1363  }
1364  }
1365  if (mode == NEWMV) {
1367  s->prob.p.mv_joint);
1368 
1369  s->counts.mv_joint[j]++;
1370  if (j >= MV_JOINT_V)
1371  mv[0].y += read_mv_component(s, 0, hp);
1372  if (j & 1)
1373  mv[0].x += read_mv_component(s, 1, hp);
1374  }
1375 
1376  if (b->comp) {
1377  // FIXME cache this value and reuse for other subblocks
1378  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1379  mode == NEWMV ? -1 : sb);
1380  if ((mode == NEWMV || sb == -1) &&
1381  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1382  if (mv[1].y & 1) {
1383  if (mv[1].y < 0)
1384  mv[1].y++;
1385  else
1386  mv[1].y--;
1387  }
1388  if (mv[1].x & 1) {
1389  if (mv[1].x < 0)
1390  mv[1].x++;
1391  else
1392  mv[1].x--;
1393  }
1394  }
1395  if (mode == NEWMV) {
1397  s->prob.p.mv_joint);
1398 
1399  s->counts.mv_joint[j]++;
1400  if (j >= MV_JOINT_V)
1401  mv[1].y += read_mv_component(s, 0, hp);
1402  if (j & 1)
1403  mv[1].x += read_mv_component(s, 1, hp);
1404  }
1405  }
1406  }
1407 }
1408 
1409 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1410  ptrdiff_t stride, int v)
1411 {
1412  switch (w) {
1413  case 1:
1414  do {
1415  *ptr = v;
1416  ptr += stride;
1417  } while (--h);
1418  break;
1419  case 2: {
1420  int v16 = v * 0x0101;
1421  do {
1422  AV_WN16A(ptr, v16);
1423  ptr += stride;
1424  } while (--h);
1425  break;
1426  }
1427  case 4: {
1428  uint32_t v32 = v * 0x01010101;
1429  do {
1430  AV_WN32A(ptr, v32);
1431  ptr += stride;
1432  } while (--h);
1433  break;
1434  }
1435  case 8: {
1436 #if HAVE_FAST_64BIT
1437  uint64_t v64 = v * 0x0101010101010101ULL;
1438  do {
1439  AV_WN64A(ptr, v64);
1440  ptr += stride;
1441  } while (--h);
1442 #else
1443  uint32_t v32 = v * 0x01010101;
1444  do {
1445  AV_WN32A(ptr, v32);
1446  AV_WN32A(ptr + 4, v32);
1447  ptr += stride;
1448  } while (--h);
1449 #endif
1450  break;
1451  }
1452  }
1453 }
1454 
1455 static void decode_mode(AVCodecContext *ctx)
1456 {
1457  static const uint8_t left_ctx[N_BS_SIZES] = {
1458  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1459  };
1460  static const uint8_t above_ctx[N_BS_SIZES] = {
1461  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1462  };
1463  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1465  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1466  };
1467  VP9Context *s = ctx->priv_data;
1468  VP9Block *b = s->b;
1469  int row = s->row, col = s->col, row7 = s->row7;
1470  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1471  int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1472  int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1473  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1474  int vref, filter_id;
1475 
1476  if (!s->segmentation.enabled) {
1477  b->seg_id = 0;
1478  } else if (s->keyframe || s->intraonly) {
1480  } else if (!s->segmentation.update_map ||
1481  (s->segmentation.temporal &&
1483  s->prob.segpred[s->above_segpred_ctx[col] +
1484  s->left_segpred_ctx[row7]]))) {
1485  if (!s->errorres && !s->segmentation.ignore_refmap) {
1486  int pred = 8, x;
1488 
1491  for (y = 0; y < h4; y++) {
1492  int idx_base = (y + row) * 8 * s->sb_cols + col;
1493  for (x = 0; x < w4; x++)
1494  pred = FFMIN(pred, refsegmap[idx_base + x]);
1495  }
1496  av_assert1(pred < 8);
1497  b->seg_id = pred;
1498  } else {
1499  b->seg_id = 0;
1500  }
1501 
1502  memset(&s->above_segpred_ctx[col], 1, w4);
1503  memset(&s->left_segpred_ctx[row7], 1, h4);
1504  } else {
1506  s->prob.seg);
1507 
1508  memset(&s->above_segpred_ctx[col], 0, w4);
1509  memset(&s->left_segpred_ctx[row7], 0, h4);
1510  }
1511  if (s->segmentation.enabled &&
1512  (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1513  setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1514  bw4, bh4, 8 * s->sb_cols, b->seg_id);
1515  }
1516 
1517  b->skip = s->segmentation.enabled &&
1518  s->segmentation.feat[b->seg_id].skip_enabled;
1519  if (!b->skip) {
1520  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1521  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1522  s->counts.skip[c][b->skip]++;
1523  }
1524 
1525  if (s->keyframe || s->intraonly) {
1526  b->intra = 1;
1527  } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1528  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1529  } else {
1530  int c, bit;
1531 
1532  if (have_a && have_l) {
1533  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1534  c += (c == 2);
1535  } else {
1536  c = have_a ? 2 * s->above_intra_ctx[col] :
1537  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1538  }
1539  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1540  s->counts.intra[c][bit]++;
1541  b->intra = !bit;
1542  }
1543 
1544  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1545  int c;
1546  if (have_a) {
1547  if (have_l) {
1548  c = (s->above_skip_ctx[col] ? max_tx :
1549  s->above_txfm_ctx[col]) +
1550  (s->left_skip_ctx[row7] ? max_tx :
1551  s->left_txfm_ctx[row7]) > max_tx;
1552  } else {
1553  c = s->above_skip_ctx[col] ? 1 :
1554  (s->above_txfm_ctx[col] * 2 > max_tx);
1555  }
1556  } else if (have_l) {
1557  c = s->left_skip_ctx[row7] ? 1 :
1558  (s->left_txfm_ctx[row7] * 2 > max_tx);
1559  } else {
1560  c = 1;
1561  }
1562  switch (max_tx) {
1563  case TX_32X32:
1564  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1565  if (b->tx) {
1566  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1567  if (b->tx == 2)
1568  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1569  }
1570  s->counts.tx32p[c][b->tx]++;
1571  break;
1572  case TX_16X16:
1573  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1574  if (b->tx)
1575  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1576  s->counts.tx16p[c][b->tx]++;
1577  break;
1578  case TX_8X8:
1579  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1580  s->counts.tx8p[c][b->tx]++;
1581  break;
1582  case TX_4X4:
1583  b->tx = TX_4X4;
1584  break;
1585  }
1586  } else {
1587  b->tx = FFMIN(max_tx, s->txfmmode);
1588  }
1589 
1590  if (s->keyframe || s->intraonly) {
1591  uint8_t *a = &s->above_mode_ctx[col * 2];
1592  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1593 
1594  b->comp = 0;
1595  if (b->bs > BS_8x8) {
1596  // FIXME the memory storage intermediates here aren't really
1597  // necessary, they're just there to make the code slightly
1598  // simpler for now
1599  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1600  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1601  if (b->bs != BS_8x4) {
1603  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1604  l[0] = a[1] = b->mode[1];
1605  } else {
1606  l[0] = a[1] = b->mode[1] = b->mode[0];
1607  }
1608  if (b->bs != BS_4x8) {
1609  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1610  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1611  if (b->bs != BS_8x4) {
1613  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1614  l[1] = a[1] = b->mode[3];
1615  } else {
1616  l[1] = a[1] = b->mode[3] = b->mode[2];
1617  }
1618  } else {
1619  b->mode[2] = b->mode[0];
1620  l[1] = a[1] = b->mode[3] = b->mode[1];
1621  }
1622  } else {
1624  vp9_default_kf_ymode_probs[*a][*l]);
1625  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1626  // FIXME this can probably be optimized
1627  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1628  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1629  }
1632  } else if (b->intra) {
1633  b->comp = 0;
1634  if (b->bs > BS_8x8) {
1636  s->prob.p.y_mode[0]);
1637  s->counts.y_mode[0][b->mode[0]]++;
1638  if (b->bs != BS_8x4) {
1640  s->prob.p.y_mode[0]);
1641  s->counts.y_mode[0][b->mode[1]]++;
1642  } else {
1643  b->mode[1] = b->mode[0];
1644  }
1645  if (b->bs != BS_4x8) {
1647  s->prob.p.y_mode[0]);
1648  s->counts.y_mode[0][b->mode[2]]++;
1649  if (b->bs != BS_8x4) {
1651  s->prob.p.y_mode[0]);
1652  s->counts.y_mode[0][b->mode[3]]++;
1653  } else {
1654  b->mode[3] = b->mode[2];
1655  }
1656  } else {
1657  b->mode[2] = b->mode[0];
1658  b->mode[3] = b->mode[1];
1659  }
1660  } else {
1661  static const uint8_t size_group[10] = {
1662  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1663  };
1664  int sz = size_group[b->bs];
1665 
1667  s->prob.p.y_mode[sz]);
1668  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1669  s->counts.y_mode[sz][b->mode[3]]++;
1670  }
1672  s->prob.p.uv_mode[b->mode[3]]);
1673  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1674  } else {
1675  static const uint8_t inter_mode_ctx_lut[14][14] = {
1676  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1687  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1688  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1689  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1690  };
1691 
1692  if (s->segmentation.feat[b->seg_id].ref_enabled) {
1693  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1694  b->comp = 0;
1695  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1696  } else {
1697  // read comp_pred flag
1698  if (s->comppredmode != PRED_SWITCHABLE) {
1699  b->comp = s->comppredmode == PRED_COMPREF;
1700  } else {
1701  int c;
1702 
1703  // FIXME add intra as ref=0xff (or -1) to make these easier?
1704  if (have_a) {
1705  if (have_l) {
1706  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1707  c = 4;
1708  } else if (s->above_comp_ctx[col]) {
1709  c = 2 + (s->left_intra_ctx[row7] ||
1710  s->left_ref_ctx[row7] == s->fixcompref);
1711  } else if (s->left_comp_ctx[row7]) {
1712  c = 2 + (s->above_intra_ctx[col] ||
1713  s->above_ref_ctx[col] == s->fixcompref);
1714  } else {
1715  c = (!s->above_intra_ctx[col] &&
1716  s->above_ref_ctx[col] == s->fixcompref) ^
1717  (!s->left_intra_ctx[row7] &&
1718  s->left_ref_ctx[row & 7] == s->fixcompref);
1719  }
1720  } else {
1721  c = s->above_comp_ctx[col] ? 3 :
1722  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1723  }
1724  } else if (have_l) {
1725  c = s->left_comp_ctx[row7] ? 3 :
1726  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1727  } else {
1728  c = 1;
1729  }
1730  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1731  s->counts.comp[c][b->comp]++;
1732  }
1733 
1734  // read actual references
1735  // FIXME probably cache a few variables here to prevent repetitive
1736  // memory accesses below
1737  if (b->comp) /* two references */ {
1738  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1739 
1740  b->ref[fix_idx] = s->fixcompref;
1741  // FIXME can this codeblob be replaced by some sort of LUT?
1742  if (have_a) {
1743  if (have_l) {
1744  if (s->above_intra_ctx[col]) {
1745  if (s->left_intra_ctx[row7]) {
1746  c = 2;
1747  } else {
1748  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1749  }
1750  } else if (s->left_intra_ctx[row7]) {
1751  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1752  } else {
1753  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1754 
1755  if (refl == refa && refa == s->varcompref[1]) {
1756  c = 0;
1757  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1758  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1759  (refl == s->fixcompref && refa == s->varcompref[0])) {
1760  c = 4;
1761  } else {
1762  c = (refa == refl) ? 3 : 1;
1763  }
1764  } else if (!s->left_comp_ctx[row7]) {
1765  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1766  c = 1;
1767  } else {
1768  c = (refl == s->varcompref[1] &&
1769  refa != s->varcompref[1]) ? 2 : 4;
1770  }
1771  } else if (!s->above_comp_ctx[col]) {
1772  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1773  c = 1;
1774  } else {
1775  c = (refa == s->varcompref[1] &&
1776  refl != s->varcompref[1]) ? 2 : 4;
1777  }
1778  } else {
1779  c = (refl == refa) ? 4 : 2;
1780  }
1781  }
1782  } else {
1783  if (s->above_intra_ctx[col]) {
1784  c = 2;
1785  } else if (s->above_comp_ctx[col]) {
1786  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1787  } else {
1788  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1789  }
1790  }
1791  } else if (have_l) {
1792  if (s->left_intra_ctx[row7]) {
1793  c = 2;
1794  } else if (s->left_comp_ctx[row7]) {
1795  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1796  } else {
1797  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1798  }
1799  } else {
1800  c = 2;
1801  }
1802  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1803  b->ref[var_idx] = s->varcompref[bit];
1804  s->counts.comp_ref[c][bit]++;
1805  } else /* single reference */ {
1806  int bit, c;
1807 
1808  if (have_a && !s->above_intra_ctx[col]) {
1809  if (have_l && !s->left_intra_ctx[row7]) {
1810  if (s->left_comp_ctx[row7]) {
1811  if (s->above_comp_ctx[col]) {
1812  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1813  !s->above_ref_ctx[col]);
1814  } else {
1815  c = (3 * !s->above_ref_ctx[col]) +
1816  (!s->fixcompref || !s->left_ref_ctx[row7]);
1817  }
1818  } else if (s->above_comp_ctx[col]) {
1819  c = (3 * !s->left_ref_ctx[row7]) +
1820  (!s->fixcompref || !s->above_ref_ctx[col]);
1821  } else {
1822  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1823  }
1824  } else if (s->above_intra_ctx[col]) {
1825  c = 2;
1826  } else if (s->above_comp_ctx[col]) {
1827  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1828  } else {
1829  c = 4 * (!s->above_ref_ctx[col]);
1830  }
1831  } else if (have_l && !s->left_intra_ctx[row7]) {
1832  if (s->left_intra_ctx[row7]) {
1833  c = 2;
1834  } else if (s->left_comp_ctx[row7]) {
1835  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1836  } else {
1837  c = 4 * (!s->left_ref_ctx[row7]);
1838  }
1839  } else {
1840  c = 2;
1841  }
1842  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1843  s->counts.single_ref[c][0][bit]++;
1844  if (!bit) {
1845  b->ref[0] = 0;
1846  } else {
1847  // FIXME can this codeblob be replaced by some sort of LUT?
1848  if (have_a) {
1849  if (have_l) {
1850  if (s->left_intra_ctx[row7]) {
1851  if (s->above_intra_ctx[col]) {
1852  c = 2;
1853  } else if (s->above_comp_ctx[col]) {
1854  c = 1 + 2 * (s->fixcompref == 1 ||
1855  s->above_ref_ctx[col] == 1);
1856  } else if (!s->above_ref_ctx[col]) {
1857  c = 3;
1858  } else {
1859  c = 4 * (s->above_ref_ctx[col] == 1);
1860  }
1861  } else if (s->above_intra_ctx[col]) {
1862  if (s->left_intra_ctx[row7]) {
1863  c = 2;
1864  } else if (s->left_comp_ctx[row7]) {
1865  c = 1 + 2 * (s->fixcompref == 1 ||
1866  s->left_ref_ctx[row7] == 1);
1867  } else if (!s->left_ref_ctx[row7]) {
1868  c = 3;
1869  } else {
1870  c = 4 * (s->left_ref_ctx[row7] == 1);
1871  }
1872  } else if (s->above_comp_ctx[col]) {
1873  if (s->left_comp_ctx[row7]) {
1874  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1875  c = 3 * (s->fixcompref == 1 ||
1876  s->left_ref_ctx[row7] == 1);
1877  } else {
1878  c = 2;
1879  }
1880  } else if (!s->left_ref_ctx[row7]) {
1881  c = 1 + 2 * (s->fixcompref == 1 ||
1882  s->above_ref_ctx[col] == 1);
1883  } else {
1884  c = 3 * (s->left_ref_ctx[row7] == 1) +
1885  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1886  }
1887  } else if (s->left_comp_ctx[row7]) {
1888  if (!s->above_ref_ctx[col]) {
1889  c = 1 + 2 * (s->fixcompref == 1 ||
1890  s->left_ref_ctx[row7] == 1);
1891  } else {
1892  c = 3 * (s->above_ref_ctx[col] == 1) +
1893  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1894  }
1895  } else if (!s->above_ref_ctx[col]) {
1896  if (!s->left_ref_ctx[row7]) {
1897  c = 3;
1898  } else {
1899  c = 4 * (s->left_ref_ctx[row7] == 1);
1900  }
1901  } else if (!s->left_ref_ctx[row7]) {
1902  c = 4 * (s->above_ref_ctx[col] == 1);
1903  } else {
1904  c = 2 * (s->left_ref_ctx[row7] == 1) +
1905  2 * (s->above_ref_ctx[col] == 1);
1906  }
1907  } else {
1908  if (s->above_intra_ctx[col] ||
1909  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1910  c = 2;
1911  } else if (s->above_comp_ctx[col]) {
1912  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1913  } else {
1914  c = 4 * (s->above_ref_ctx[col] == 1);
1915  }
1916  }
1917  } else if (have_l) {
1918  if (s->left_intra_ctx[row7] ||
1919  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1920  c = 2;
1921  } else if (s->left_comp_ctx[row7]) {
1922  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1923  } else {
1924  c = 4 * (s->left_ref_ctx[row7] == 1);
1925  }
1926  } else {
1927  c = 2;
1928  }
1929  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1930  s->counts.single_ref[c][1][bit]++;
1931  b->ref[0] = 1 + bit;
1932  }
1933  }
1934  }
1935 
1936  if (b->bs <= BS_8x8) {
1937  if (s->segmentation.feat[b->seg_id].skip_enabled) {
1938  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1939  } else {
1940  static const uint8_t off[10] = {
1941  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1942  };
1943 
1944  // FIXME this needs to use the LUT tables from find_ref_mvs
1945  // because not all are -1,0/0,-1
1946  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1947  [s->left_mode_ctx[row7 + off[b->bs]]];
1948 
1950  s->prob.p.mv_mode[c]);
1951  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1952  s->counts.mv_mode[c][b->mode[0] - 10]++;
1953  }
1954  }
1955 
1956  if (s->filtermode == FILTER_SWITCHABLE) {
1957  int c;
1958 
1959  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1960  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1961  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1962  s->left_filter_ctx[row7] : 3;
1963  } else {
1964  c = s->above_filter_ctx[col];
1965  }
1966  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1967  c = s->left_filter_ctx[row7];
1968  } else {
1969  c = 3;
1970  }
1971 
1972  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1973  s->prob.p.filter[c]);
1974  s->counts.filter[c][filter_id]++;
1975  b->filter = vp9_filter_lut[filter_id];
1976  } else {
1977  b->filter = s->filtermode;
1978  }
1979 
1980  if (b->bs > BS_8x8) {
1981  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1982 
1984  s->prob.p.mv_mode[c]);
1985  s->counts.mv_mode[c][b->mode[0] - 10]++;
1986  fill_mv(s, b->mv[0], b->mode[0], 0);
1987 
1988  if (b->bs != BS_8x4) {
1990  s->prob.p.mv_mode[c]);
1991  s->counts.mv_mode[c][b->mode[1] - 10]++;
1992  fill_mv(s, b->mv[1], b->mode[1], 1);
1993  } else {
1994  b->mode[1] = b->mode[0];
1995  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1996  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1997  }
1998 
1999  if (b->bs != BS_4x8) {
2001  s->prob.p.mv_mode[c]);
2002  s->counts.mv_mode[c][b->mode[2] - 10]++;
2003  fill_mv(s, b->mv[2], b->mode[2], 2);
2004 
2005  if (b->bs != BS_8x4) {
2007  s->prob.p.mv_mode[c]);
2008  s->counts.mv_mode[c][b->mode[3] - 10]++;
2009  fill_mv(s, b->mv[3], b->mode[3], 3);
2010  } else {
2011  b->mode[3] = b->mode[2];
2012  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2013  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2014  }
2015  } else {
2016  b->mode[2] = b->mode[0];
2017  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2018  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2019  b->mode[3] = b->mode[1];
2020  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2021  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2022  }
2023  } else {
2024  fill_mv(s, b->mv[0], b->mode[0], -1);
2025  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2026  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2027  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2028  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2029  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2030  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2031  }
2032 
2033  vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2034  }
2035 
2036 #if HAVE_FAST_64BIT
2037 #define SPLAT_CTX(var, val, n) \
2038  switch (n) { \
2039  case 1: var = val; break; \
2040  case 2: AV_WN16A(&var, val * 0x0101); break; \
2041  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2042  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2043  case 16: { \
2044  uint64_t v64 = val * 0x0101010101010101ULL; \
2045  AV_WN64A( &var, v64); \
2046  AV_WN64A(&((uint8_t *) &var)[8], v64); \
2047  break; \
2048  } \
2049  }
2050 #else
2051 #define SPLAT_CTX(var, val, n) \
2052  switch (n) { \
2053  case 1: var = val; break; \
2054  case 2: AV_WN16A(&var, val * 0x0101); break; \
2055  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2056  case 8: { \
2057  uint32_t v32 = val * 0x01010101; \
2058  AV_WN32A( &var, v32); \
2059  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2060  break; \
2061  } \
2062  case 16: { \
2063  uint32_t v32 = val * 0x01010101; \
2064  AV_WN32A( &var, v32); \
2065  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2066  AV_WN32A(&((uint8_t *) &var)[8], v32); \
2067  AV_WN32A(&((uint8_t *) &var)[12], v32); \
2068  break; \
2069  } \
2070  }
2071 #endif
2072 
2073  switch (bwh_tab[1][b->bs][0]) {
2074 #define SET_CTXS(dir, off, n) \
2075  do { \
2076  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2077  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2078  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2079  if (!s->keyframe && !s->intraonly) { \
2080  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2081  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2082  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2083  if (!b->intra) { \
2084  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2085  if (s->filtermode == FILTER_SWITCHABLE) { \
2086  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2087  } \
2088  } \
2089  } \
2090  } while (0)
2091  case 1: SET_CTXS(above, col, 1); break;
2092  case 2: SET_CTXS(above, col, 2); break;
2093  case 4: SET_CTXS(above, col, 4); break;
2094  case 8: SET_CTXS(above, col, 8); break;
2095  }
2096  switch (bwh_tab[1][b->bs][1]) {
2097  case 1: SET_CTXS(left, row7, 1); break;
2098  case 2: SET_CTXS(left, row7, 2); break;
2099  case 4: SET_CTXS(left, row7, 4); break;
2100  case 8: SET_CTXS(left, row7, 8); break;
2101  }
2102 #undef SPLAT_CTX
2103 #undef SET_CTXS
2104 
2105  if (!s->keyframe && !s->intraonly) {
2106  if (b->bs > BS_8x8) {
2107  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2108 
2109  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2110  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2111  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2112  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2113  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2114  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2115  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2116  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2117  } else {
2118  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2119 
2120  for (n = 0; n < w4 * 2; n++) {
2121  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2122  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2123  }
2124  for (n = 0; n < h4 * 2; n++) {
2125  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2126  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2127  }
2128  }
2129  }
2130 
2131  // FIXME kinda ugly
2132  for (y = 0; y < h4; y++) {
2133  int x, o = (row + y) * s->sb_cols * 8 + col;
2134  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2135 
2136  if (b->intra) {
2137  for (x = 0; x < w4; x++) {
2138  mv[x].ref[0] =
2139  mv[x].ref[1] = -1;
2140  }
2141  } else if (b->comp) {
2142  for (x = 0; x < w4; x++) {
2143  mv[x].ref[0] = b->ref[0];
2144  mv[x].ref[1] = b->ref[1];
2145  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2146  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2147  }
2148  } else {
2149  for (x = 0; x < w4; x++) {
2150  mv[x].ref[0] = b->ref[0];
2151  mv[x].ref[1] = -1;
2152  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2153  }
2154  }
2155  }
2156 }
2157 
2158 // FIXME merge cnt/eob arguments?
2159 static av_always_inline int
2160 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2161  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2162  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2163  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2164  const int16_t *band_counts, const int16_t *qmul)
2165 {
2166  int i = 0, band = 0, band_left = band_counts[band];
2167  uint8_t *tp = p[0][nnz];
2168  uint8_t cache[1024];
2169 
2170  do {
2171  int val, rc;
2172 
2173  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2174  eob[band][nnz][val]++;
2175  if (!val)
2176  break;
2177 
2178  skip_eob:
2179  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2180  cnt[band][nnz][0]++;
2181  if (!--band_left)
2182  band_left = band_counts[++band];
2183  cache[scan[i]] = 0;
2184  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2185  tp = p[band][nnz];
2186  if (++i == n_coeffs)
2187  break; //invalid input; blocks should end with EOB
2188  goto skip_eob;
2189  }
2190 
2191  rc = scan[i];
2192  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2193  cnt[band][nnz][1]++;
2194  val = 1;
2195  cache[rc] = 1;
2196  } else {
2197  // fill in p[3-10] (model fill) - only once per frame for each pos
2198  if (!tp[3])
2199  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2200 
2201  cnt[band][nnz][2]++;
2202  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2203  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2204  cache[rc] = val = 2;
2205  } else {
2206  val = 3 + vp56_rac_get_prob(c, tp[5]);
2207  cache[rc] = 3;
2208  }
2209  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2210  cache[rc] = 4;
2211  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2212  val = 5 + vp56_rac_get_prob(c, 159);
2213  } else {
2214  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2215  val += vp56_rac_get_prob(c, 145);
2216  }
2217  } else { // cat 3-6
2218  cache[rc] = 5;
2219  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2220  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2221  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2222  val += (vp56_rac_get_prob(c, 148) << 1);
2223  val += vp56_rac_get_prob(c, 140);
2224  } else {
2225  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2226  val += (vp56_rac_get_prob(c, 155) << 2);
2227  val += (vp56_rac_get_prob(c, 140) << 1);
2228  val += vp56_rac_get_prob(c, 135);
2229  }
2230  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2231  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2232  val += (vp56_rac_get_prob(c, 157) << 3);
2233  val += (vp56_rac_get_prob(c, 141) << 2);
2234  val += (vp56_rac_get_prob(c, 134) << 1);
2235  val += vp56_rac_get_prob(c, 130);
2236  } else {
2237  val = 67;
2238  if (!is8bitsperpixel) {
2239  if (bpp == 12) {
2240  val += vp56_rac_get_prob(c, 255) << 17;
2241  val += vp56_rac_get_prob(c, 255) << 16;
2242  }
2243  val += (vp56_rac_get_prob(c, 255) << 15);
2244  val += (vp56_rac_get_prob(c, 255) << 14);
2245  }
2246  val += (vp56_rac_get_prob(c, 254) << 13);
2247  val += (vp56_rac_get_prob(c, 254) << 12);
2248  val += (vp56_rac_get_prob(c, 254) << 11);
2249  val += (vp56_rac_get_prob(c, 252) << 10);
2250  val += (vp56_rac_get_prob(c, 249) << 9);
2251  val += (vp56_rac_get_prob(c, 243) << 8);
2252  val += (vp56_rac_get_prob(c, 230) << 7);
2253  val += (vp56_rac_get_prob(c, 196) << 6);
2254  val += (vp56_rac_get_prob(c, 177) << 5);
2255  val += (vp56_rac_get_prob(c, 153) << 4);
2256  val += (vp56_rac_get_prob(c, 140) << 3);
2257  val += (vp56_rac_get_prob(c, 133) << 2);
2258  val += (vp56_rac_get_prob(c, 130) << 1);
2259  val += vp56_rac_get_prob(c, 129);
2260  }
2261  }
2262  }
2263 #define STORE_COEF(c, i, v) do { \
2264  if (is8bitsperpixel) { \
2265  c[i] = v; \
2266  } else { \
2267  AV_WN32A(&c[i * 2], v); \
2268  } \
2269 } while (0)
2270  if (!--band_left)
2271  band_left = band_counts[++band];
2272  if (is_tx32x32)
2273  STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2274  else
2275  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2276  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2277  tp = p[band][nnz];
2278  } while (++i < n_coeffs);
2279 
2280  return i;
2281 }
2282 
2283 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2284  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2285  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2286  const int16_t (*nb)[2], const int16_t *band_counts,
2287  const int16_t *qmul)
2288 {
2289  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2290  nnz, scan, nb, band_counts, qmul);
2291 }
2292 
2293 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2294  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2295  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2296  const int16_t (*nb)[2], const int16_t *band_counts,
2297  const int16_t *qmul)
2298 {
2299  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2300  nnz, scan, nb, band_counts, qmul);
2301 }
2302 
2303 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2304  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2305  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2306  const int16_t (*nb)[2], const int16_t *band_counts,
2307  const int16_t *qmul)
2308 {
2309  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2310  nnz, scan, nb, band_counts, qmul);
2311 }
2312 
2313 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2314  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2315  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2316  const int16_t (*nb)[2], const int16_t *band_counts,
2317  const int16_t *qmul)
2318 {
2319  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2320  nnz, scan, nb, band_counts, qmul);
2321 }
2322 
2323 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2324 {
2325  VP9Context *s = ctx->priv_data;
2326  VP9Block *b = s->b;
2327  int row = s->row, col = s->col;
2328  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2329  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2330  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2331  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2332  int end_x = FFMIN(2 * (s->cols - col), w4);
2333  int end_y = FFMIN(2 * (s->rows - row), h4);
2334  int n, pl, x, y, res;
2335  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2336  int tx = 4 * s->lossless + b->tx;
2337  const int16_t * const *yscans = vp9_scans[tx];
2338  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2339  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2340  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2341  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2342  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2343  static const int16_t band_counts[4][8] = {
2344  { 1, 2, 3, 4, 3, 16 - 13 },
2345  { 1, 2, 3, 4, 11, 64 - 21 },
2346  { 1, 2, 3, 4, 11, 256 - 21 },
2347  { 1, 2, 3, 4, 11, 1024 - 21 },
2348  };
2349  const int16_t *y_band_counts = band_counts[b->tx];
2350  const int16_t *uv_band_counts = band_counts[b->uvtx];
2351  int bytesperpixel = is8bitsperpixel ? 1 : 2;
2352  int total_coeff = 0;
2353 
2354 #define MERGE(la, end, step, rd) \
2355  for (n = 0; n < end; n += step) \
2356  la[n] = !!rd(&la[n])
2357 #define MERGE_CTX(step, rd) \
2358  do { \
2359  MERGE(l, end_y, step, rd); \
2360  MERGE(a, end_x, step, rd); \
2361  } while (0)
2362 
2363 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2364  for (n = 0, y = 0; y < end_y; y += step) { \
2365  for (x = 0; x < end_x; x += step, n += step * step) { \
2366  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2367  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2368  (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2369  c, e, p, a[x] + l[y], yscans[txtp], \
2370  ynbs[txtp], y_band_counts, qmul[0]); \
2371  a[x] = l[y] = !!res; \
2372  total_coeff |= !!res; \
2373  if (step >= 4) { \
2374  AV_WN16A(&s->eob[n], res); \
2375  } else { \
2376  s->eob[n] = res; \
2377  } \
2378  } \
2379  }
2380 
2381 #define SPLAT(la, end, step, cond) \
2382  if (step == 2) { \
2383  for (n = 1; n < end; n += step) \
2384  la[n] = la[n - 1]; \
2385  } else if (step == 4) { \
2386  if (cond) { \
2387  for (n = 0; n < end; n += step) \
2388  AV_WN32A(&la[n], la[n] * 0x01010101); \
2389  } else { \
2390  for (n = 0; n < end; n += step) \
2391  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2392  } \
2393  } else /* step == 8 */ { \
2394  if (cond) { \
2395  if (HAVE_FAST_64BIT) { \
2396  for (n = 0; n < end; n += step) \
2397  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2398  } else { \
2399  for (n = 0; n < end; n += step) { \
2400  uint32_t v32 = la[n] * 0x01010101; \
2401  AV_WN32A(&la[n], v32); \
2402  AV_WN32A(&la[n + 4], v32); \
2403  } \
2404  } \
2405  } else { \
2406  for (n = 0; n < end; n += step) \
2407  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2408  } \
2409  }
2410 #define SPLAT_CTX(step) \
2411  do { \
2412  SPLAT(a, end_x, step, end_x == w4); \
2413  SPLAT(l, end_y, step, end_y == h4); \
2414  } while (0)
2415 
2416  /* y tokens */
2417  switch (b->tx) {
2418  case TX_4X4:
2419  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2420  break;
2421  case TX_8X8:
2422  MERGE_CTX(2, AV_RN16A);
2423  DECODE_Y_COEF_LOOP(2, 0,);
2424  SPLAT_CTX(2);
2425  break;
2426  case TX_16X16:
2427  MERGE_CTX(4, AV_RN32A);
2428  DECODE_Y_COEF_LOOP(4, 0,);
2429  SPLAT_CTX(4);
2430  break;
2431  case TX_32X32:
2432  MERGE_CTX(8, AV_RN64A);
2433  DECODE_Y_COEF_LOOP(8, 0, 32);
2434  SPLAT_CTX(8);
2435  break;
2436  }
2437 
2438 #define DECODE_UV_COEF_LOOP(step, v) \
2439  for (n = 0, y = 0; y < end_y; y += step) { \
2440  for (x = 0; x < end_x; x += step, n += step * step) { \
2441  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2442  (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2443  16 * step * step, c, e, p, a[x] + l[y], \
2444  uvscan, uvnb, uv_band_counts, qmul[1]); \
2445  a[x] = l[y] = !!res; \
2446  total_coeff |= !!res; \
2447  if (step >= 4) { \
2448  AV_WN16A(&s->uveob[pl][n], res); \
2449  } else { \
2450  s->uveob[pl][n] = res; \
2451  } \
2452  } \
2453  }
2454 
2455  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2456  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2457  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2458  w4 >>= s->ss_h;
2459  end_x >>= s->ss_h;
2460  h4 >>= s->ss_v;
2461  end_y >>= s->ss_v;
2462  for (pl = 0; pl < 2; pl++) {
2463  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2464  l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2465  switch (b->uvtx) {
2466  case TX_4X4:
2467  DECODE_UV_COEF_LOOP(1,);
2468  break;
2469  case TX_8X8:
2470  MERGE_CTX(2, AV_RN16A);
2471  DECODE_UV_COEF_LOOP(2,);
2472  SPLAT_CTX(2);
2473  break;
2474  case TX_16X16:
2475  MERGE_CTX(4, AV_RN32A);
2476  DECODE_UV_COEF_LOOP(4,);
2477  SPLAT_CTX(4);
2478  break;
2479  case TX_32X32:
2480  MERGE_CTX(8, AV_RN64A);
2481  DECODE_UV_COEF_LOOP(8, 32);
2482  SPLAT_CTX(8);
2483  break;
2484  }
2485  }
2486 
2487  return total_coeff;
2488 }
2489 
2491 {
2492  return decode_coeffs(ctx, 1);
2493 }
2494 
2496 {
2497  return decode_coeffs(ctx, 0);
2498 }
2499 
2501  uint8_t *dst_edge, ptrdiff_t stride_edge,
2502  uint8_t *dst_inner, ptrdiff_t stride_inner,
2503  uint8_t *l, int col, int x, int w,
2504  int row, int y, enum TxfmMode tx,
2505  int p, int ss_h, int ss_v, int bytesperpixel)
2506 {
2507  int have_top = row > 0 || y > 0;
2508  int have_left = col > s->tiling.tile_col_start || x > 0;
2509  int have_right = x < w - 1;
2510  int bpp = s->bpp;
2511  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2512  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2513  { DC_127_PRED, VERT_PRED } },
2514  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2515  { HOR_PRED, HOR_PRED } },
2516  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2517  { LEFT_DC_PRED, DC_PRED } },
2527  { DC_127_PRED, VERT_LEFT_PRED } },
2528  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2529  { HOR_UP_PRED, HOR_UP_PRED } },
2530  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2531  { HOR_PRED, TM_VP8_PRED } },
2532  };
2533  static const struct {
2534  uint8_t needs_left:1;
2535  uint8_t needs_top:1;
2536  uint8_t needs_topleft:1;
2537  uint8_t needs_topright:1;
2538  uint8_t invert_left:1;
2539  } edges[N_INTRA_PRED_MODES] = {
2540  [VERT_PRED] = { .needs_top = 1 },
2541  [HOR_PRED] = { .needs_left = 1 },
2542  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2543  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2544  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2546  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2548  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2549  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2550  [LEFT_DC_PRED] = { .needs_left = 1 },
2551  [TOP_DC_PRED] = { .needs_top = 1 },
2552  [DC_128_PRED] = { 0 },
2553  [DC_127_PRED] = { 0 },
2554  [DC_129_PRED] = { 0 }
2555  };
2556 
2557  av_assert2(mode >= 0 && mode < 10);
2558  mode = mode_conv[mode][have_left][have_top];
2559  if (edges[mode].needs_top) {
2560  uint8_t *top, *topleft;
2561  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2562  int n_px_need_tr = 0;
2563 
2564  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2565  n_px_need_tr = 4;
2566 
2567  // if top of sb64-row, use s->intra_pred_data[] instead of
2568  // dst[-stride] for intra prediction (it contains pre- instead of
2569  // post-loopfilter data)
2570  if (have_top) {
2571  top = !(row & 7) && !y ?
2572  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2573  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2574  if (have_left)
2575  topleft = !(row & 7) && !y ?
2576  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2577  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2578  &dst_inner[-stride_inner];
2579  }
2580 
2581  if (have_top &&
2582  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2583  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2584  n_px_need + n_px_need_tr <= n_px_have) {
2585  *a = top;
2586  } else {
2587  if (have_top) {
2588  if (n_px_need <= n_px_have) {
2589  memcpy(*a, top, n_px_need * bytesperpixel);
2590  } else {
2591 #define memset_bpp(c, i1, v, i2, num) do { \
2592  if (bytesperpixel == 1) { \
2593  memset(&(c)[(i1)], (v)[(i2)], (num)); \
2594  } else { \
2595  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2596  for (n = 0; n < (num); n++) { \
2597  AV_WN16A(&(c)[((i1) + n) * 2], val); \
2598  } \
2599  } \
2600 } while (0)
2601  memcpy(*a, top, n_px_have * bytesperpixel);
2602  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2603  }
2604  } else {
2605 #define memset_val(c, val, num) do { \
2606  if (bytesperpixel == 1) { \
2607  memset((c), (val), (num)); \
2608  } else { \
2609  int n; \
2610  for (n = 0; n < (num); n++) { \
2611  AV_WN16A(&(c)[n * 2], (val)); \
2612  } \
2613  } \
2614 } while (0)
2615  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2616  }
2617  if (edges[mode].needs_topleft) {
2618  if (have_left && have_top) {
2619 #define assign_bpp(c, i1, v, i2) do { \
2620  if (bytesperpixel == 1) { \
2621  (c)[(i1)] = (v)[(i2)]; \
2622  } else { \
2623  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2624  } \
2625 } while (0)
2626  assign_bpp(*a, -1, topleft, -1);
2627  } else {
2628 #define assign_val(c, i, v) do { \
2629  if (bytesperpixel == 1) { \
2630  (c)[(i)] = (v); \
2631  } else { \
2632  AV_WN16A(&(c)[(i) * 2], (v)); \
2633  } \
2634 } while (0)
2635  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2636  }
2637  }
2638  if (tx == TX_4X4 && edges[mode].needs_topright) {
2639  if (have_top && have_right &&
2640  n_px_need + n_px_need_tr <= n_px_have) {
2641  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2642  } else {
2643  memset_bpp(*a, 4, *a, 3, 4);
2644  }
2645  }
2646  }
2647  }
2648  if (edges[mode].needs_left) {
2649  if (have_left) {
2650  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2651  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2652  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2653 
2654  if (edges[mode].invert_left) {
2655  if (n_px_need <= n_px_have) {
2656  for (i = 0; i < n_px_need; i++)
2657  assign_bpp(l, i, &dst[i * stride], -1);
2658  } else {
2659  for (i = 0; i < n_px_have; i++)
2660  assign_bpp(l, i, &dst[i * stride], -1);
2661  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2662  }
2663  } else {
2664  if (n_px_need <= n_px_have) {
2665  for (i = 0; i < n_px_need; i++)
2666  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2667  } else {
2668  for (i = 0; i < n_px_have; i++)
2669  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2670  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2671  }
2672  }
2673  } else {
2674  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2675  }
2676  }
2677 
2678  return mode;
2679 }
2680 
2681 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2682  ptrdiff_t uv_off, int bytesperpixel)
2683 {
2684  VP9Context *s = ctx->priv_data;
2685  VP9Block *b = s->b;
2686  int row = s->row, col = s->col;
2687  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2688  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2689  int end_x = FFMIN(2 * (s->cols - col), w4);
2690  int end_y = FFMIN(2 * (s->rows - row), h4);
2691  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2692  int uvstep1d = 1 << b->uvtx, p;
2693  uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2694  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2695  LOCAL_ALIGNED_32(uint8_t, l, [64]);
2696 
2697  for (n = 0, y = 0; y < end_y; y += step1d) {
2698  uint8_t *ptr = dst, *ptr_r = dst_r;
2699  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2700  ptr_r += 4 * step1d * bytesperpixel, n += step) {
2701  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2702  y * 2 + x : 0];
2703  uint8_t *a = &a_buf[32];
2704  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2705  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2706 
2707  mode = check_intra_mode(s, mode, &a, ptr_r,
2708  s->frames[CUR_FRAME].tf.f->linesize[0],
2709  ptr, s->y_stride, l,
2710  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2711  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2712  if (eob)
2713  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2714  s->block + 16 * n * bytesperpixel, eob);
2715  }
2716  dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2717  dst += 4 * step1d * s->y_stride;
2718  }
2719 
2720  // U/V
2721  w4 >>= s->ss_h;
2722  end_x >>= s->ss_h;
2723  end_y >>= s->ss_v;
2724  step = 1 << (b->uvtx * 2);
2725  for (p = 0; p < 2; p++) {
2726  dst = s->dst[1 + p];
2727  dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2728  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2729  uint8_t *ptr = dst, *ptr_r = dst_r;
2730  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2731  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2732  int mode = b->uvmode;
2733  uint8_t *a = &a_buf[32];
2734  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2735 
2736  mode = check_intra_mode(s, mode, &a, ptr_r,
2737  s->frames[CUR_FRAME].tf.f->linesize[1],
2738  ptr, s->uv_stride, l, col, x, w4, row, y,
2739  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2740  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2741  if (eob)
2742  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2743  s->uvblock[p] + 16 * n * bytesperpixel, eob);
2744  }
2745  dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2746  dst += 4 * uvstep1d * s->uv_stride;
2747  }
2748  }
2749 }
2750 
2751 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2752 {
2753  intra_recon(ctx, y_off, uv_off, 1);
2754 }
2755 
2756 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2757 {
2758  intra_recon(ctx, y_off, uv_off, 2);
2759 }
2760 
2762  uint8_t *dst, ptrdiff_t dst_stride,
2763  const uint8_t *ref, ptrdiff_t ref_stride,
2765  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2766  int px, int py, int pw, int ph,
2767  int bw, int bh, int w, int h, int bytesperpixel,
2768  const uint16_t *scale, const uint8_t *step)
2769 {
2770 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2771  int mx, my;
2772  int refbw_m1, refbh_m1;
2773  int th;
2774  VP56mv mv;
2775 
2776  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2777  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2778  // BUG libvpx seems to scale the two components separately. This introduces
2779  // rounding errors but we have to reproduce them to be exactly compatible
2780  // with the output from libvpx...
2781  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2782  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2783 
2784  y = my >> 4;
2785  x = mx >> 4;
2786  ref += y * ref_stride + x * bytesperpixel;
2787  mx &= 15;
2788  my &= 15;
2789  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2790  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2791  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2792  // we use +7 because the last 7 pixels of each sbrow can be changed in
2793  // the longest loopfilter of the next sbrow
2794  th = (y + refbh_m1 + 4 + 7) >> 6;
2795  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2796  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2798  ref - 3 * ref_stride - 3 * bytesperpixel,
2799  288, ref_stride,
2800  refbw_m1 + 8, refbh_m1 + 8,
2801  x - 3, y - 3, w, h);
2802  ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2803  ref_stride = 288;
2804  }
2805  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2806 }
2807 
2809  uint8_t *dst_u, uint8_t *dst_v,
2810  ptrdiff_t dst_stride,
2811  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2812  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2814  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2815  int px, int py, int pw, int ph,
2816  int bw, int bh, int w, int h, int bytesperpixel,
2817  const uint16_t *scale, const uint8_t *step)
2818 {
2819  int mx, my;
2820  int refbw_m1, refbh_m1;
2821  int th;
2822  VP56mv mv;
2823 
2824  if (s->ss_h) {
2825  // BUG https://code.google.com/p/webm/issues/detail?id=820
2826  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2827  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2828  } else {
2829  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2830  mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2831  }
2832  if (s->ss_v) {
2833  // BUG https://code.google.com/p/webm/issues/detail?id=820
2834  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2835  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2836  } else {
2837  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2838  my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2839  }
2840 #undef scale_mv
2841  y = my >> 4;
2842  x = mx >> 4;
2843  ref_u += y * src_stride_u + x * bytesperpixel;
2844  ref_v += y * src_stride_v + x * bytesperpixel;
2845  mx &= 15;
2846  my &= 15;
2847  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2848  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2849  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2850  // we use +7 because the last 7 pixels of each sbrow can be changed in
2851  // the longest loopfilter of the next sbrow
2852  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2853  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2854  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2856  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2857  288, src_stride_u,
2858  refbw_m1 + 8, refbh_m1 + 8,
2859  x - 3, y - 3, w, h);
2860  ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2861  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2862 
2864  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2865  288, src_stride_v,
2866  refbw_m1 + 8, refbh_m1 + 8,
2867  x - 3, y - 3, w, h);
2868  ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2869  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2870  } else {
2871  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2872  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2873  }
2874 }
2875 
2876 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2877  px, py, pw, ph, bw, bh, w, h, i) \
2878  mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2879  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2880  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2881 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2882  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2883  mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2884  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2885  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2886 #define SCALED 1
2887 #define FN(x) x##_scaled_8bpp
2888 #define BYTES_PER_PIXEL 1
2889 #include "vp9_mc_template.c"
2890 #undef FN
2891 #undef BYTES_PER_PIXEL
2892 #define FN(x) x##_scaled_16bpp
2893 #define BYTES_PER_PIXEL 2
2894 #include "vp9_mc_template.c"
2895 #undef mc_luma_dir
2896 #undef mc_chroma_dir
2897 #undef FN
2898 #undef BYTES_PER_PIXEL
2899 #undef SCALED
2900 
2902  uint8_t *dst, ptrdiff_t dst_stride,
2903  const uint8_t *ref, ptrdiff_t ref_stride,
2905  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2906  int bw, int bh, int w, int h, int bytesperpixel)
2907 {
2908  int mx = mv->x, my = mv->y, th;
2909 
2910  y += my >> 3;
2911  x += mx >> 3;
2912  ref += y * ref_stride + x * bytesperpixel;
2913  mx &= 7;
2914  my &= 7;
2915  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2916  // we use +7 because the last 7 pixels of each sbrow can be changed in
2917  // the longest loopfilter of the next sbrow
2918  th = (y + bh + 4 * !!my + 7) >> 6;
2919  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2920  if (x < !!mx * 3 || y < !!my * 3 ||
2921  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2923  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2924  160, ref_stride,
2925  bw + !!mx * 7, bh + !!my * 7,
2926  x - !!mx * 3, y - !!my * 3, w, h);
2927  ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2928  ref_stride = 160;
2929  }
2930  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2931 }
2932 
2934  uint8_t *dst_u, uint8_t *dst_v,
2935  ptrdiff_t dst_stride,
2936  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2937  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2939  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2940  int bw, int bh, int w, int h, int bytesperpixel)
2941 {
2942  int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2943 
2944  y += my >> 4;
2945  x += mx >> 4;
2946  ref_u += y * src_stride_u + x * bytesperpixel;
2947  ref_v += y * src_stride_v + x * bytesperpixel;
2948  mx &= 15;
2949  my &= 15;
2950  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2951  // we use +7 because the last 7 pixels of each sbrow can be changed in
2952  // the longest loopfilter of the next sbrow
2953  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2954  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2955  if (x < !!mx * 3 || y < !!my * 3 ||
2956  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2958  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2959  160, src_stride_u,
2960  bw + !!mx * 7, bh + !!my * 7,
2961  x - !!mx * 3, y - !!my * 3, w, h);
2962  ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2963  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2964 
2966  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2967  160, src_stride_v,
2968  bw + !!mx * 7, bh + !!my * 7,
2969  x - !!mx * 3, y - !!my * 3, w, h);
2970  ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2972  } else {
2973  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2974  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2975  }
2976 }
2977 
2978 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2979  px, py, pw, ph, bw, bh, w, h, i) \
2980  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2981  mv, bw, bh, w, h, bytesperpixel)
2982 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2983  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2984  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2985  row, col, mv, bw, bh, w, h, bytesperpixel)
2986 #define SCALED 0
2987 #define FN(x) x##_8bpp
2988 #define BYTES_PER_PIXEL 1
2989 #include "vp9_mc_template.c"
2990 #undef FN
2991 #undef BYTES_PER_PIXEL
2992 #define FN(x) x##_16bpp
2993 #define BYTES_PER_PIXEL 2
2994 #include "vp9_mc_template.c"
2995 #undef mc_luma_dir_dir
2996 #undef mc_chroma_dir_dir
2997 #undef FN
2998 #undef BYTES_PER_PIXEL
2999 #undef SCALED
3000 
3001 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3002 {
3003  VP9Context *s = ctx->priv_data;
3004  VP9Block *b = s->b;
3005  int row = s->row, col = s->col;
3006 
3007  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3008  if (bytesperpixel == 1) {
3009  inter_pred_scaled_8bpp(ctx);
3010  } else {
3011  inter_pred_scaled_16bpp(ctx);
3012  }
3013  } else {
3014  if (bytesperpixel == 1) {
3015  inter_pred_8bpp(ctx);
3016  } else {
3017  inter_pred_16bpp(ctx);
3018  }
3019  }
3020  if (!b->skip) {
3021  /* mostly copied intra_recon() */
3022 
3023  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3024  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3025  int end_x = FFMIN(2 * (s->cols - col), w4);
3026  int end_y = FFMIN(2 * (s->rows - row), h4);
3027  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3028  int uvstep1d = 1 << b->uvtx, p;
3029  uint8_t *dst = s->dst[0];
3030 
3031  // y itxfm add
3032  for (n = 0, y = 0; y < end_y; y += step1d) {
3033  uint8_t *ptr = dst;
3034  for (x = 0; x < end_x; x += step1d,
3035  ptr += 4 * step1d * bytesperpixel, n += step) {
3036  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3037 
3038  if (eob)
3039  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3040  s->block + 16 * n * bytesperpixel, eob);
3041  }
3042  dst += 4 * s->y_stride * step1d;
3043  }
3044 
3045  // uv itxfm add
3046  end_x >>= s->ss_h;
3047  end_y >>= s->ss_v;
3048  step = 1 << (b->uvtx * 2);
3049  for (p = 0; p < 2; p++) {
3050  dst = s->dst[p + 1];
3051  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3052  uint8_t *ptr = dst;
3053  for (x = 0; x < end_x; x += uvstep1d,
3054  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3055  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3056 
3057  if (eob)
3058  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3059  s->uvblock[p] + 16 * n * bytesperpixel, eob);
3060  }
3061  dst += 4 * uvstep1d * s->uv_stride;
3062  }
3063  }
3064  }
3065 }
3066 
3068 {
3069  inter_recon(ctx, 1);
3070 }
3071 
3073 {
3074  inter_recon(ctx, 2);
3075 }
3076 
3077 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3078  int row_and_7, int col_and_7,
3079  int w, int h, int col_end, int row_end,
3080  enum TxfmMode tx, int skip_inter)
3081 {
3082  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3083  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3084 
3085  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3086  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3087  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3088  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3089 
3090  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3091  // edges. This means that for UV, we work on two subsampled blocks at
3092  // a time, and we only use the topleft block's mode information to set
3093  // things like block strength. Thus, for any block size smaller than
3094  // 16x16, ignore the odd portion of the block.
3095  if (tx == TX_4X4 && (ss_v | ss_h)) {
3096  if (h == ss_v) {
3097  if (row_and_7 & 1)
3098  return;
3099  if (!row_end)
3100  h += 1;
3101  }
3102  if (w == ss_h) {
3103  if (col_and_7 & 1)
3104  return;
3105  if (!col_end)
3106  w += 1;
3107  }
3108  }
3109 
3110  if (tx == TX_4X4 && !skip_inter) {
3111  int t = 1 << col_and_7, m_col = (t << w) - t, y;
3112  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3113  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3114 
3115  for (y = row_and_7; y < h + row_and_7; y++) {
3116  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3117 
3118  mask[0][y][1] |= m_row_8;
3119  mask[0][y][2] |= m_row_4;
3120  // for odd lines, if the odd col is not being filtered,
3121  // skip odd row also:
3122  // .---. <-- a
3123  // | |
3124  // |___| <-- b
3125  // ^ ^
3126  // c d
3127  //
3128  // if a/c are even row/col and b/d are odd, and d is skipped,
3129  // e.g. right edge of size-66x66.webm, then skip b also (bug)
3130  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3131  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3132  } else {
3133  mask[1][y][col_mask_id] |= m_col;
3134  }
3135  if (!ss_h)
3136  mask[0][y][3] |= m_col;
3137  if (!ss_v) {
3138  if (ss_h && (col_end & 1))
3139  mask[1][y][3] |= (t << (w - 1)) - t;
3140  else
3141  mask[1][y][3] |= m_col;
3142  }
3143  }
3144  } else {
3145  int y, t = 1 << col_and_7, m_col = (t << w) - t;
3146 
3147  if (!skip_inter) {
3148  int mask_id = (tx == TX_8X8);
3149  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3150  int l2 = tx + ss_h - 1, step1d;
3151  int m_row = m_col & masks[l2];
3152 
3153  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3154  // 8wd loopfilter to prevent going off the visible edge.
3155  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3156  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3157  int m_row_8 = m_row - m_row_16;
3158 
3159  for (y = row_and_7; y < h + row_and_7; y++) {
3160  mask[0][y][0] |= m_row_16;
3161  mask[0][y][1] |= m_row_8;
3162  }
3163  } else {
3164  for (y = row_and_7; y < h + row_and_7; y++)
3165  mask[0][y][mask_id] |= m_row;
3166  }
3167 
3168  l2 = tx + ss_v - 1;
3169  step1d = 1 << l2;
3170  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3171  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3172  mask[1][y][0] |= m_col;
3173  if (y - row_and_7 == h - 1)
3174  mask[1][y][1] |= m_col;
3175  } else {
3176  for (y = row_and_7; y < h + row_and_7; y += step1d)
3177  mask[1][y][mask_id] |= m_col;
3178  }
3179  } else if (tx != TX_4X4) {
3180  int mask_id;
3181 
3182  mask_id = (tx == TX_8X8) || (h == ss_v);
3183  mask[1][row_and_7][mask_id] |= m_col;
3184  mask_id = (tx == TX_8X8) || (w == ss_h);
3185  for (y = row_and_7; y < h + row_and_7; y++)
3186  mask[0][y][mask_id] |= t;
3187  } else {
3188  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3189 
3190  for (y = row_and_7; y < h + row_and_7; y++) {
3191  mask[0][y][2] |= t4;
3192  mask[0][y][1] |= t8;
3193  }
3194  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3195  }
3196  }
3197 }
3198 
3199 static void decode_b(AVCodecContext *ctx, int row, int col,
3200  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3201  enum BlockLevel bl, enum BlockPartition bp)
3202 {
3203  VP9Context *s = ctx->priv_data;
3204  VP9Block *b = s->b;
3205  enum BlockSize bs = bl * 3 + bp;
3206  int bytesperpixel = s->bytesperpixel;
3207  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3208  int emu[2];
3209  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3210 
3211  s->row = row;
3212  s->row7 = row & 7;
3213  s->col = col;
3214  s->col7 = col & 7;
3215  s->min_mv.x = -(128 + col * 64);
3216  s->min_mv.y = -(128 + row * 64);
3217  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3218  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3219  if (s->pass < 2) {
3220  b->bs = bs;
3221  b->bl = bl;
3222  b->bp = bp;
3223  decode_mode(ctx);
3224  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3225  (s->ss_v && h4 * 2 == (1 << b->tx)));
3226 
3227  if (!b->skip) {
3228  int has_coeffs;
3229 
3230  if (bytesperpixel == 1) {
3231  has_coeffs = decode_coeffs_8bpp(ctx);
3232  } else {
3233  has_coeffs = decode_coeffs_16bpp(ctx);
3234  }
3235  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3236  b->skip = 1;
3237  memset(&s->above_skip_ctx[col], 1, w4);
3238  memset(&s->left_skip_ctx[s->row7], 1, h4);
3239  }
3240  } else {
3241  int row7 = s->row7;
3242 
3243 #define SPLAT_ZERO_CTX(v, n) \
3244  switch (n) { \
3245  case 1: v = 0; break; \
3246  case 2: AV_ZERO16(&v); break; \
3247  case 4: AV_ZERO32(&v); break; \
3248  case 8: AV_ZERO64(&v); break; \
3249  case 16: AV_ZERO128(&v); break; \
3250  }
3251 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3252  do { \
3253  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3254  if (s->ss_##dir2) { \
3255  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3256  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3257  } else { \
3258  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3259  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3260  } \
3261  } while (0)
3262 
3263  switch (w4) {
3264  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3265  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3266  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3267  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3268  }
3269  switch (h4) {
3270  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3271  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3272  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3273  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3274  }
3275  }
3276  if (s->pass == 1) {
3277  s->b++;
3278  s->block += w4 * h4 * 64 * bytesperpixel;
3279  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3280  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3281  s->eob += 4 * w4 * h4;
3282  s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3283  s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3284 
3285  return;
3286  }
3287  }
3288 
3289  // emulated overhangs if the stride of the target buffer can't hold. This
3290  // makes it possible to support emu-edge and so on even if we have large block
3291  // overhangs
3292  emu[0] = (col + w4) * 8 > f->linesize[0] ||
3293  (row + h4) > s->rows;
3294  emu[1] = (col + w4) * 4 > f->linesize[1] ||
3295  (row + h4) > s->rows;
3296  if (emu[0]) {
3297  s->dst[0] = s->tmp_y;
3298  s->y_stride = 128;
3299  } else {
3300  s->dst[0] = f->data[0] + yoff;
3301  s->y_stride = f->linesize[0];
3302  }
3303  if (emu[1]) {
3304  s->dst[1] = s->tmp_uv[0];
3305  s->dst[2] = s->tmp_uv[1];
3306  s->uv_stride = 128;
3307  } else {
3308  s->dst[1] = f->data[1] + uvoff;
3309  s->dst[2] = f->data[2] + uvoff;
3310  s->uv_stride = f->linesize[1];
3311  }
3312  if (b->intra) {
3313  if (s->bpp > 8) {
3314  intra_recon_16bpp(ctx, yoff, uvoff);
3315  } else {
3316  intra_recon_8bpp(ctx, yoff, uvoff);
3317  }
3318  } else {
3319  if (s->bpp > 8) {
3320  inter_recon_16bpp(ctx);
3321  } else {
3322  inter_recon_8bpp(ctx);
3323  }
3324  }
3325  if (emu[0]) {
3326  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3327 
3328  for (n = 0; o < w; n++) {
3329  int bw = 64 >> n;
3330 
3331  av_assert2(n <= 4);
3332  if (w & bw) {
3333  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3334  s->tmp_y + o, 128, h, 0, 0);
3335  o += bw * bytesperpixel;
3336  }
3337  }
3338  }
3339  if (emu[1]) {
3340  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3341  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3342 
3343  for (n = s->ss_h; o < w; n++) {
3344  int bw = 64 >> n;
3345 
3346  av_assert2(n <= 4);
3347  if (w & bw) {
3348  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3349  s->tmp_uv[0] + o, 128, h, 0, 0);
3350  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3351  s->tmp_uv[1] + o, 128, h, 0, 0);
3352  o += bw * bytesperpixel;
3353  }
3354  }
3355  }
3356 
3357  // pick filter level and find edges to apply filter to
3358  if (s->filter.level &&
3359  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3360  [b->mode[3] != ZEROMV]) > 0) {
3361  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3362  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3363 
3364  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3365  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3366  if (s->ss_h || s->ss_v)
3367  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3368  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3369  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3370  b->uvtx, skip_inter);
3371 
3372  if (!s->filter.lim_lut[lvl]) {
3373  int sharp = s->filter.sharpness;
3374  int limit = lvl;
3375 
3376  if (sharp > 0) {
3377  limit >>= (sharp + 3) >> 2;
3378  limit = FFMIN(limit, 9 - sharp);
3379  }
3380  limit = FFMAX(limit, 1);
3381 
3382  s->filter.lim_lut[lvl] = limit;
3383  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3384  }
3385  }
3386 
3387  if (s->pass == 2) {
3388  s->b++;
3389  s->block += w4 * h4 * 64 * bytesperpixel;
3390  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3391  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3392  s->eob += 4 * w4 * h4;
3393  s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3394  s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3395  }
3396 }
3397 
3398 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3399  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3400 {
3401  VP9Context *s = ctx->priv_data;
3402  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3403  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3404  const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3405  s->prob.p.partition[bl][c];
3406  enum BlockPartition bp;
3407  ptrdiff_t hbs = 4 >> bl;
3408  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3409  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3410  int bytesperpixel = s->bytesperpixel;
3411 
3412  if (bl == BL_8X8) {
3413  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3414  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3415  } else if (col + hbs < s->cols) { // FIXME why not <=?
3416  if (row + hbs < s->rows) { // FIXME why not <=?
3417  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3418  switch (bp) {
3419  case PARTITION_NONE:
3420  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3421  break;
3422  case PARTITION_H:
3423  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3424  yoff += hbs * 8 * y_stride;
3425  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3426  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3427  break;
3428  case PARTITION_V:
3429  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3430  yoff += hbs * 8 * bytesperpixel;
3431  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3432  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3433  break;
3434  case PARTITION_SPLIT:
3435  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3436  decode_sb(ctx, row, col + hbs, lflvl,
3437  yoff + 8 * hbs * bytesperpixel,
3438  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3439  yoff += hbs * 8 * y_stride;
3440  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3441  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3442  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3443  yoff + 8 * hbs * bytesperpixel,
3444  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3445  break;
3446  default:
3447  av_assert0(0);
3448  }
3449  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3450  bp = PARTITION_SPLIT;
3451  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3452  decode_sb(ctx, row, col + hbs, lflvl,
3453  yoff + 8 * hbs * bytesperpixel,
3454  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3455  } else {
3456  bp = PARTITION_H;
3457  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3458  }
3459  } else if (row + hbs < s->rows) { // FIXME why not <=?
3460  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3461  bp = PARTITION_SPLIT;
3462  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3463  yoff += hbs * 8 * y_stride;
3464  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3465  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3466  } else {
3467  bp = PARTITION_V;
3468  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3469  }
3470  } else {
3471  bp = PARTITION_SPLIT;
3472  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3473  }
3474  s->counts.partition[bl][c][bp]++;
3475 }
3476 
3477 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3478  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3479 {
3480  VP9Context *s = ctx->priv_data;
3481  VP9Block *b = s->b;
3482  ptrdiff_t hbs = 4 >> bl;
3483  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3484  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3485  int bytesperpixel = s->bytesperpixel;
3486 
3487  if (bl == BL_8X8) {
3488  av_assert2(b->bl == BL_8X8);
3489  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3490  } else if (s->b->bl == bl) {
3491  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3492  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3493  yoff += hbs * 8 * y_stride;
3494  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3495  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3496  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3497  yoff += hbs * 8 * bytesperpixel;
3498  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3499  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3500  }
3501  } else {
3502  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3503  if (col + hbs < s->cols) { // FIXME why not <=?
3504  if (row + hbs < s->rows) {
3505  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3506  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3507  yoff += hbs * 8 * y_stride;
3508  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3509  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3510  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3511  yoff + 8 * hbs * bytesperpixel,
3512  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3513  } else {
3514  yoff += hbs * 8 * bytesperpixel;
3515  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3516  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3517  }
3518  } else if (row + hbs < s->rows) {
3519  yoff += hbs * 8 * y_stride;
3520  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3521  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3522  }
3523  }
3524 }
3525 
3526 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3527  uint8_t *lvl, uint8_t (*mask)[4],
3528  uint8_t *dst, ptrdiff_t ls)
3529 {
3530  int y, x, bytesperpixel = s->bytesperpixel;
3531 
3532  // filter edges between columns (e.g. block1 | block2)
3533  for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3534  uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3535  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3536  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3537  unsigned hm = hm1 | hm2 | hm13 | hm23;
3538 
3539  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3540  if (col || x > 1) {
3541  if (hm1 & x) {
3542  int L = *l, H = L >> 4;
3543  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3544 
3545  if (hmask1[0] & x) {
3546  if (hmask2[0] & x) {
3547  av_assert2(l[8 << ss_v] == L);
3548  s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3549  } else {
3550  s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3551  }
3552  } else if (hm2 & x) {
3553  L = l[8 << ss_v];
3554  H |= (L >> 4) << 8;
3555  E |= s->filter.mblim_lut[L] << 8;
3556  I |= s->filter.lim_lut[L] << 8;
3557  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3558  [!!(hmask2[1] & x)]
3559  [0](ptr, ls, E, I, H);
3560  } else {
3561  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3562  [0](ptr, ls, E, I, H);
3563  }
3564  } else if (hm2 & x) {
3565  int L = l[8 << ss_v], H = L >> 4;
3566  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3567 
3568  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3569  [0](ptr + 8 * ls, ls, E, I, H);
3570  }
3571  }
3572  if (ss_h) {
3573  if (x & 0xAA)
3574  l += 2;
3575  } else {
3576  if (hm13 & x) {
3577  int L = *l, H = L >> 4;
3578  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3579 
3580  if (hm23 & x) {
3581  L = l[8 << ss_v];
3582  H |= (L >> 4) << 8;
3583  E |= s->filter.mblim_lut[L] << 8;
3584  I |= s->filter.lim_lut[L] << 8;
3585  s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3586  } else {
3587  s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3588  }
3589  } else if (hm23 & x) {
3590  int L = l[8 << ss_v], H = L >> 4;
3591  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3592 
3593  s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3594  }
3595  l++;
3596  }
3597  }
3598  }
3599 }
3600 
3601 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3602  uint8_t *lvl, uint8_t (*mask)[4],
3603  uint8_t *dst, ptrdiff_t ls)
3604 {
3605  int y, x, bytesperpixel = s->bytesperpixel;
3606 
3607  // block1
3608  // filter edges between rows (e.g. ------)
3609  // block2
3610  for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3611  uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3612  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3613 
3614  for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3615  if (row || y) {
3616  if (vm & x) {
3617  int L = *l, H = L >> 4;
3618  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3619 
3620  if (vmask[0] & x) {
3621  if (vmask[0] & (x << (1 + ss_h))) {
3622  av_assert2(l[1 + ss_h] == L);
3623  s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3624  } else {
3625  s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3626  }
3627  } else if (vm & (x << (1 + ss_h))) {
3628  L = l[1 + ss_h];
3629  H |= (L >> 4) << 8;
3630  E |= s->filter.mblim_lut[L] << 8;
3631  I |= s->filter.lim_lut[L] << 8;
3632  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3633  [!!(vmask[1] & (x << (1 + ss_h)))]
3634  [1](ptr, ls, E, I, H);
3635  } else {
3636  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3637  [1](ptr, ls, E, I, H);
3638  }
3639  } else if (vm & (x << (1 + ss_h))) {
3640  int L = l[1 + ss_h], H = L >> 4;
3641  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3642 
3643  s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3644  [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3645  }
3646  }
3647  if (!ss_v) {
3648  if (vm3 & x) {
3649  int L = *l, H = L >> 4;
3650  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3651 
3652  if (vm3 & (x << (1 + ss_h))) {
3653  L = l[1 + ss_h];
3654  H |= (L >> 4) << 8;
3655  E |= s->filter.mblim_lut[L] << 8;
3656  I |= s->filter.lim_lut[L] << 8;
3657  s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3658  } else {
3659  s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3660  }
3661  } else if (vm3 & (x << (1 + ss_h))) {
3662  int L = l[1 + ss_h], H = L >> 4;
3663  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3664 
3665  s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3666  }
3667  }
3668  }
3669  if (ss_v) {
3670  if (y & 1)
3671  lvl += 16;
3672  } else {
3673  lvl += 8;
3674  }
3675  }
3676 }
3677 
3678 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3679  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3680 {
3681  VP9Context *s = ctx->priv_data;
3682  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3683  uint8_t *dst = f->data[0] + yoff;
3684  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3685  uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3686  int p;
3687 
3688  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3689  // if you think of them as acting on a 8x8 block max, we can interleave
3690  // each v/h within the single x loop, but that only works if we work on
3691  // 8 pixel blocks, and we won't always do that (we want at least 16px
3692  // to use SSE2 optimizations, perhaps 32 for AVX2)
3693 
3694  filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3695  filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3696 
3697  for (p = 0; p < 2; p++) {
3698  dst = f->data[1 + p] + uvoff;
3699  filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3700  filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3701  }
3702 }
3703 
3704 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3705 {
3706  int sb_start = ( idx * n) >> log2_n;
3707  int sb_end = ((idx + 1) * n) >> log2_n;
3708  *start = FFMIN(sb_start, n) << 3;
3709  *end = FFMIN(sb_end, n) << 3;
3710 }
3711 
3712 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3713  int max_count, int update_factor)
3714 {
3715  unsigned ct = ct0 + ct1, p2, p1;
3716 
3717  if (!ct)
3718  return;
3719 
3720  p1 = *p;
3721  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3722  p2 = av_clip(p2, 1, 255);
3723  ct = FFMIN(ct, max_count);
3724  update_factor = FASTDIV(update_factor * ct, max_count);
3725 
3726  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3727  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3728 }
3729 
3730 static void adapt_probs(VP9Context *s)
3731 {
3732  int i, j, k, l, m;
3733  prob_context *p = &s->prob_ctx[s->framectxid].p;
3734  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3735 
3736  // coefficients
3737  for (i = 0; i < 4; i++)
3738  for (j = 0; j < 2; j++)
3739  for (k = 0; k < 2; k++)
3740  for (l = 0; l < 6; l++)
3741  for (m = 0; m < 6; m++) {
3742  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3743  unsigned *e = s->counts.eob[i][j][k][l][m];
3744  unsigned *c = s->counts.coef[i][j][k][l][m];
3745 
3746  if (l == 0 && m >= 3) // dc only has 3 pt
3747  break;
3748 
3749  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3750  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3751  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3752  }
3753 
3754  if (s->keyframe || s->intraonly) {
3755  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3756  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3757  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3758  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3759  return;
3760  }
3761 
3762  // skip flag
3763  for (i = 0; i < 3; i++)
3764  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3765 
3766  // intra/inter flag
3767  for (i = 0; i < 4; i++)
3768  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3769 
3770  // comppred flag
3771  if (s->comppredmode == PRED_SWITCHABLE) {
3772  for (i = 0; i < 5; i++)
3773  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3774  }
3775 
3776  // reference frames
3777  if (s->comppredmode != PRED_SINGLEREF) {
3778  for (i = 0; i < 5; i++)
3779  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3780  s->counts.comp_ref[i][1], 20, 128);
3781  }
3782 
3783  if (s->comppredmode != PRED_COMPREF) {
3784  for (i = 0; i < 5; i++) {
3785  uint8_t *pp = p->single_ref[i];
3786  unsigned (*c)[2] = s->counts.single_ref[i];
3787 
3788  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3789  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3790  }
3791  }
3792 
3793  // block partitioning
3794  for (i = 0; i < 4; i++)
3795  for (j = 0; j < 4; j++) {
3796  uint8_t *pp = p->partition[i][j];
3797  unsigned *c = s->counts.partition[i][j];
3798 
3799  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3800  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3801  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3802  }
3803 
3804  // tx size
3805  if (s->txfmmode == TX_SWITCHABLE) {
3806  for (i = 0; i < 2; i++) {
3807  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3808 
3809  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3810  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3811  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3812  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3813  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3814  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3815  }
3816  }
3817 
3818  // interpolation filter
3819  if (s->filtermode == FILTER_SWITCHABLE) {
3820  for (i = 0; i < 4; i++) {
3821  uint8_t *pp = p->filter[i];
3822  unsigned *c = s->counts.filter[i];
3823 
3824  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3825  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3826  }
3827  }
3828 
3829  // inter modes
3830  for (i = 0; i < 7; i++) {
3831  uint8_t *pp = p->mv_mode[i];
3832  unsigned *c = s->counts.mv_mode[i];
3833 
3834  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3835  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3836  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3837  }
3838 
3839  // mv joints
3840  {
3841  uint8_t *pp = p->mv_joint;
3842  unsigned *c = s->counts.mv_joint;
3843 
3844  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3845  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3846  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3847  }
3848 
3849  // mv components
3850  for (i = 0; i < 2; i++) {
3851  uint8_t *pp;
3852  unsigned *c, (*c2)[2], sum;
3853 
3854  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3855  s->counts.mv_comp[i].sign[1], 20, 128);
3856 
3857  pp = p->mv_comp[i].classes;
3858  c = s->counts.mv_comp[i].classes;
3859  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3860  adapt_prob(&pp[0], c[0], sum, 20, 128);
3861  sum -= c[1];
3862  adapt_prob(&pp[1], c[1], sum, 20, 128);
3863  sum -= c[2] + c[3];
3864  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3865  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3866  sum -= c[4] + c[5];
3867  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3868  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3869  sum -= c[6];
3870  adapt_prob(&pp[6], c[6], sum, 20, 128);
3871  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3872  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3873  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3874 
3875  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3876  s->counts.mv_comp[i].class0[1], 20, 128);
3877  pp = p->mv_comp[i].bits;
3878  c2 = s->counts.mv_comp[i].bits;
3879  for (j = 0; j < 10; j++)
3880  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3881 
3882  for (j = 0; j < 2; j++) {
3883  pp = p->mv_comp[i].class0_fp[j];
3884  c = s->counts.mv_comp[i].class0_fp[j];
3885  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3886  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3887  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3888  }
3889  pp = p->mv_comp[i].fp;
3890  c = s->counts.mv_comp[i].fp;
3891  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3892  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3893  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3894 
3895  if (s->highprecisionmvs) {
3896  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3897  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3898  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3899  s->counts.mv_comp[i].hp[1], 20, 128);
3900  }
3901  }
3902 
3903  // y intra modes
3904  for (i = 0; i < 4; i++) {
3905  uint8_t *pp = p->y_mode[i];
3906  unsigned *c = s->counts.y_mode[i], sum, s2;
3907 
3908  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3909  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3910  sum -= c[TM_VP8_PRED];
3911  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3912  sum -= c[VERT_PRED];
3913  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3914  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3915  sum -= s2;
3916  adapt_prob(&pp[3], s2, sum, 20, 128);
3917  s2 -= c[HOR_PRED];
3918  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3919  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3920  sum -= c[DIAG_DOWN_LEFT_PRED];
3921  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3922  sum -= c[VERT_LEFT_PRED];
3923  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3924  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3925  }
3926 
3927  // uv intra modes
3928  for (i = 0; i < 10; i++) {
3929  uint8_t *pp = p->uv_mode[i];
3930  unsigned *c = s->counts.uv_mode[i], sum, s2;
3931 
3932  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3933  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3934  sum -= c[TM_VP8_PRED];
3935  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3936  sum -= c[VERT_PRED];
3937  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3938  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3939  sum -= s2;
3940  adapt_prob(&pp[3], s2, sum, 20, 128);
3941  s2 -= c[HOR_PRED];
3942  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3943  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3944  sum -= c[DIAG_DOWN_LEFT_PRED];
3945  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3946  sum -= c[VERT_LEFT_PRED];
3947  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3948  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3949  }
3950 }
3951 
3952 static void free_buffers(VP9Context *s)
3953 {
3954  av_freep(&s->intra_pred_data[0]);
3955  av_freep(&s->b_base);
3956  av_freep(&s->block_base);
3957 }
3958 
3960 {
3961  VP9Context *s = ctx->priv_data;
3962  int i;
3963 
3964  for (i = 0; i < 3; i++) {
3965  if (s->frames[i].tf.f->data[0])
3966  vp9_unref_frame(ctx, &s->frames[i]);
3967  av_frame_free(&s->frames[i].tf.f);
3968  }
3969  for (i = 0; i < 8; i++) {
3970  if (s->refs[i].f->data[0])
3971  ff_thread_release_buffer(ctx, &s->refs[i]);
3972  av_frame_free(&s->refs[i].f);
3973  if (s->next_refs[i].f->data[0])
3974  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3975  av_frame_free(&s->next_refs[i].f);
3976  }
3977  free_buffers(s);
3978  av_freep(&s->c_b);
3979  s->c_b_size = 0;
3980 
3981  return 0;
3982 }
3983 
3984 
3985 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3986  int *got_frame, AVPacket *pkt)
3987 {
3988  const uint8_t *data = pkt->data;
3989  int size = pkt->size;
3990  VP9Context *s = ctx->priv_data;
3991  int res, tile_row, tile_col, i, ref, row, col;
3992  int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map
3994  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3995  AVFrame *f;
3996  int bytesperpixel;
3997 
3998  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3999  return res;
4000  } else if (res == 0) {
4001  if (!s->refs[ref].f->data[0]) {
4002  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4003  return AVERROR_INVALIDDATA;
4004  }
4005  if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4006  return res;
4007  ((AVFrame *)frame)->pkt_pts = pkt->pts;
4008  ((AVFrame *)frame)->pkt_dts = pkt->dts;
4009  for (i = 0; i < 8; i++) {
4010  if (s->next_refs[i].f->data[0])
4011  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4012  if (s->refs[i].f->data[0] &&
4013  (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4014  return res;
4015  }
4016  *got_frame = 1;
4017  return pkt->size;
4018  }
4019  data += res;
4020  size -= res;
4021 
4022  if (!retain_segmap_ref) {
4023  if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4025  if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4026  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4027  return res;
4028  }
4029  if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4031  if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4032  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4033  return res;
4034  if (s->frames[CUR_FRAME].tf.f->data[0])
4035  vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4036  if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4037  return res;
4038  f = s->frames[CUR_FRAME].tf.f;
4039  f->key_frame = s->keyframe;
4041  ls_y = f->linesize[0];
4042  ls_uv =f->linesize[1];
4043 
4044  // ref frame setup
4045  for (i = 0; i < 8; i++) {
4046  if (s->next_refs[i].f->data[0])
4047  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4048  if (s->refreshrefmask & (1 << i)) {
4049  res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4050  } else if (s->refs[i].f->data[0]) {
4051  res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4052  }
4053  if (res < 0)
4054  return res;
4055  }
4056 
4057  // main tile decode loop
4058  bytesperpixel = s->bytesperpixel;
4059  memset(s->above_partition_ctx, 0, s->cols);
4060  memset(s->above_skip_ctx, 0, s->cols);
4061  if (s->keyframe || s->intraonly) {
4062  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4063  } else {
4064  memset(s->above_mode_ctx, NEARESTMV, s->cols);
4065  }
4066  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4067  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4068  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4069  memset(s->above_segpred_ctx, 0, s->cols);
4070  s->pass = s->frames[CUR_FRAME].uses_2pass =
4072  if ((res = update_block_buffers(ctx)) < 0) {
4073  av_log(ctx, AV_LOG_ERROR,
4074  "Failed to allocate block buffers\n");
4075  return res;
4076  }
4077  if (s->refreshctx && s->parallelmode) {
4078  int j, k, l, m;
4079 
4080  for (i = 0; i < 4; i++) {
4081  for (j = 0; j < 2; j++)
4082  for (k = 0; k < 2; k++)
4083  for (l = 0; l < 6; l++)
4084  for (m = 0; m < 6; m++)
4085  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4086  s->prob.coef[i][j][k][l][m], 3);
4087  if (s->txfmmode == i)
4088  break;
4089  }
4090  s->prob_ctx[s->framectxid].p = s->prob.p;
4092  } else if (!s->refreshctx) {
4094  }
4095 
4096  do {
4097  yoff = uvoff = 0;
4098  s->b = s->b_base;
4099  s->block = s->block_base;
4100  s->uvblock[0] = s->uvblock_base[0];
4101  s->uvblock[1] = s->uvblock_base[1];
4102  s->eob = s->eob_base;
4103  s->uveob[0] = s->uveob_base[0];
4104  s->uveob[1] = s->uveob_base[1];
4105 
4106  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4108  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4109  if (s->pass != 2) {
4110  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4111  int64_t tile_size;
4112 
4113  if (tile_col == s->tiling.tile_cols - 1 &&
4114  tile_row == s->tiling.tile_rows - 1) {
4115  tile_size = size;
4116  } else {
4117  tile_size = AV_RB32(data);
4118  data += 4;
4119  size -= 4;
4120  }
4121  if (tile_size > size) {
4122  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4123  return AVERROR_INVALIDDATA;
4124  }
4125  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4126  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4127  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4128  return AVERROR_INVALIDDATA;
4129  }
4130  data += tile_size;
4131  size -= tile_size;
4132  }
4133  }
4134 
4135  for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4136  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4137  struct VP9Filter *lflvl_ptr = s->lflvl;
4138  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4139 
4140  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4142  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4143 
4144  if (s->pass != 2) {
4145  memset(s->left_partition_ctx, 0, 8);
4146  memset(s->left_skip_ctx, 0, 8);
4147  if (s->keyframe || s->intraonly) {
4148  memset(s->left_mode_ctx, DC_PRED, 16);
4149  } else {
4150  memset(s->left_mode_ctx, NEARESTMV, 8);
4151  }
4152  memset(s->left_y_nnz_ctx, 0, 16);
4153  memset(s->left_uv_nnz_ctx, 0, 32);
4154  memset(s->left_segpred_ctx, 0, 8);
4155 
4156  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4157  }
4158 
4159  for (col = s->tiling.tile_col_start;
4160  col < s->tiling.tile_col_end;
4161  col += 8, yoff2 += 64 * bytesperpixel,
4162  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4163  // FIXME integrate with lf code (i.e. zero after each
4164  // use, similar to invtxfm coefficients, or similar)
4165  if (s->pass != 1) {
4166  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4167  }
4168 
4169  if (s->pass == 2) {
4170  decode_sb_mem(ctx, row, col, lflvl_ptr,
4171  yoff2, uvoff2, BL_64X64);
4172  } else {
4173  decode_sb(ctx, row, col, lflvl_ptr,
4174  yoff2, uvoff2, BL_64X64);
4175  }
4176  }
4177  if (s->pass != 2) {
4178  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4179  }
4180  }
4181 
4182  if (s->pass == 1) {
4183  continue;
4184  }
4185 
4186  // backup pre-loopfilter reconstruction data for intra
4187  // prediction of next row of sb64s
4188  if (row + 8 < s->rows) {
4189  memcpy(s->intra_pred_data[0],
4190  f->data[0] + yoff + 63 * ls_y,
4191  8 * s->cols * bytesperpixel);
4192  memcpy(s->intra_pred_data[1],
4193  f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4194  8 * s->cols * bytesperpixel >> s->ss_h);
4195  memcpy(s->intra_pred_data[2],
4196  f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4197  8 * s->cols * bytesperpixel >> s->ss_h);
4198  }
4199 
4200  // loopfilter one row
4201  if (s->filter.level) {
4202  yoff2 = yoff;
4203  uvoff2 = uvoff;
4204  lflvl_ptr = s->lflvl;
4205  for (col = 0; col < s->cols;
4206  col += 8, yoff2 += 64 * bytesperpixel,
4207  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4208  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4209  }
4210  }
4211 
4212  // FIXME maybe we can make this more finegrained by running the
4213  // loopfilter per-block instead of after each sbrow
4214  // In fact that would also make intra pred left preparation easier?
4215  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4216  }
4217  }
4218 
4219  if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4220  adapt_probs(s);
4222  }
4223  } while (s->pass++ == 1);
4224  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4225 
4226  // ref frame setup
4227  for (i = 0; i < 8; i++) {
4228  if (s->refs[i].f->data[0])
4229  ff_thread_release_buffer(ctx, &s->refs[i]);
4230  ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4231  }
4232 
4233  if (!s->invisible) {
4234  if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4235  return res;
4236  *got_frame = 1;
4237  }
4238 
4239  return pkt->size;
4240 }
4241 
4243 {
4244  VP9Context *s = ctx->priv_data;
4245  int i;
4246 
4247  for (i = 0; i < 3; i++)
4248  vp9_unref_frame(ctx, &s->frames[i]);
4249  for (i = 0; i < 8; i++)
4250  ff_thread_release_buffer(ctx, &s->refs[i]);
4251 }
4252 
4253 static int init_frames(AVCodecContext *ctx)
4254 {
4255  VP9Context *s = ctx->priv_data;
4256  int i;
4257 
4258  for (i = 0; i < 3; i++) {
4259  s->frames[i].tf.f = av_frame_alloc();
4260  if (!s->frames[i].tf.f) {
4261  vp9_decode_free(ctx);
4262  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4263  return AVERROR(ENOMEM);
4264  }
4265  }
4266  for (i = 0; i < 8; i++) {
4267  s->refs[i].f = av_frame_alloc();
4268  s->next_refs[i].f = av_frame_alloc();
4269  if (!s->refs[i].f || !s->next_refs[i].f) {
4270  vp9_decode_free(ctx);
4271  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4272  return AVERROR(ENOMEM);
4273  }
4274  }
4275 
4276  return 0;
4277 }
4278 
4280 {
4281  VP9Context *s = ctx->priv_data;
4282 
4283  ctx->internal->allocate_progress = 1;
4284  s->last_bpp = 0;
4285  s->filter.sharpness = -1;
4286 
4287  return init_frames(ctx);
4288 }
4289 
4291 {
4292  return init_frames(avctx);
4293 }
4294 
4296 {
4297  int i, res;
4298  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4299 
4300  // detect size changes in other threads
4301  if (s->intra_pred_data[0] &&
4302  (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4303  free_buffers(s);
4304  }
4305 
4306  for (i = 0; i < 3; i++) {
4307  if (s->frames[i].tf.f->data[0])
4308  vp9_unref_frame(dst, &s->frames[i]);
4309  if (ssrc->frames[i].tf.f->data[0]) {
4310  if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4311  return res;
4312  }
4313  }
4314  for (i = 0; i < 8; i++) {
4315  if (s->refs[i].f->data[0])
4316  ff_thread_release_buffer(dst, &s->refs[i]);
4317  if (ssrc->next_refs[i].f->data[0]) {
4318  if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4319  return res;
4320  }
4321  }
4322 
4323  s->invisible = ssrc->invisible;
4324  s->keyframe = ssrc->keyframe;
4325  s->ss_v = ssrc->ss_v;
4326  s->ss_h = ssrc->ss_h;
4327  s->segmentation.enabled = ssrc->segmentation.enabled;
4328  s->segmentation.update_map = ssrc->segmentation.update_map;
4329  s->bytesperpixel = ssrc->bytesperpixel;
4330  s->bpp = ssrc->bpp;
4331  s->bpp_index = ssrc->bpp_index;
4332  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4333  memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4334  if (ssrc->segmentation.enabled) {
4335  memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4336  sizeof(s->segmentation.feat));
4337  }
4338 
4339  return 0;
4340 }
4341 
4342 static const AVProfile profiles[] = {
4343  { FF_PROFILE_VP9_0, "Profile 0" },
4344  { FF_PROFILE_VP9_1, "Profile 1" },
4345  { FF_PROFILE_VP9_2, "Profile 2" },
4346  { FF_PROFILE_VP9_3, "Profile 3" },
4347  { FF_PROFILE_UNKNOWN },
4348 };
4349 
4351  .name = "vp9",
4352  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4353  .type = AVMEDIA_TYPE_VIDEO,
4354  .id = AV_CODEC_ID_VP9,
4355  .priv_data_size = sizeof(VP9Context),
4356  .init = vp9_decode_init,
4357  .close = vp9_decode_free,
4359  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4363  .profiles = NULL_IF_CONFIG_SMALL(profiles),
4364 };
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:502
ThreadFrame tf
Definition: vp9.c:74
BlockPartition
Definition: vp9data.h:29
CompPredMode
Definition: vp9.c:38
uint8_t skip[3]
Definition: vp9data.h:1455
uint8_t resetctx
Definition: vp9.c:118
VP9Frame frames[3]
Definition: vp9.c:134
const char const char void * val
Definition: avisynth_c.h:634
Definition: vp9.c:54
unsigned hp[2]
Definition: vp9.c:207
Definition: vp9.h:47
float v
const char * s
Definition: avisynth_c.h:631
uint8_t lossless
Definition: vp9.c:149