FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "thread.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dsp.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
35 
36 #define VP9_SYNCCODE 0x498342
37 
42 };
43 
44 enum BlockLevel {
49 };
50 
51 enum BlockSize {
66 };
67 
68 struct VP9mvrefPair {
69  VP56mv mv[2];
70  int8_t ref[2];
71 };
72 
73 typedef struct VP9Frame {
77  struct VP9mvrefPair *mv;
79 } VP9Frame;
80 
81 struct VP9Filter {
82  uint8_t level[8 * 8];
83  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 };
86 
87 typedef struct VP9Block {
90  VP56mv mv[4 /* b_idx */][2 /* ref */];
91  enum BlockSize bs;
92  enum TxfmMode tx, uvtx;
93  enum BlockLevel bl;
95 } VP9Block;
96 
97 typedef struct VP9Context {
103  unsigned c_b_size;
105  int pass;
106  int row, row7, col, col7;
108  ptrdiff_t y_stride, uv_stride;
109 
110  // bitstream header
131 #define CUR_FRAME 0
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
135 
136  struct {
138  int8_t sharpness;
141  } filter;
142  struct {
144  int8_t mode[2];
145  int8_t ref[4];
146  } lf_delta;
150 #define MAX_SEGMENT 8
151  struct {
157  struct {
163  int16_t q_val;
164  int8_t lf_val;
165  int16_t qmul[2][2];
166  uint8_t lflvl[4][2];
167  } feat[MAX_SEGMENT];
168  } segmentation;
169  struct {
171  unsigned tile_cols, tile_rows;
173  } tiling;
174  unsigned sb_cols, sb_rows, rows, cols;
175  struct {
177  uint8_t coef[4][2][2][6][6][3];
178  } prob_ctx[4];
179  struct {
180  prob_context p;
181  uint8_t coef[4][2][2][6][6][11];
184  } prob;
185  struct {
186  unsigned y_mode[4][10];
187  unsigned uv_mode[10][10];
188  unsigned filter[4][3];
189  unsigned mv_mode[7][4];
190  unsigned intra[4][2];
191  unsigned comp[5][2];
192  unsigned single_ref[5][2][2];
193  unsigned comp_ref[5][2];
194  unsigned tx32p[2][4];
195  unsigned tx16p[2][3];
196  unsigned tx8p[2][2];
197  unsigned skip[3][2];
198  unsigned mv_joint[4];
199  struct {
200  unsigned sign[2];
201  unsigned classes[11];
202  unsigned class0[2];
203  unsigned bits[10][2];
204  unsigned class0_fp[2][4];
205  unsigned fp[4];
206  unsigned class0_hp[2];
207  unsigned hp[2];
208  } mv_comp[2];
209  unsigned partition[4][4][4];
210  unsigned coef[4][2][2][6][6][3];
211  unsigned eob[4][2][2][6][6][2];
212  } counts;
215 
216  // contextual (left/above) cache
231  // FIXME maybe merge some of the below in a flags field?
242 
243  // whole-frame cache
245  struct VP9Filter *lflvl;
247 
248  // block reconstruction intermediates
250  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
252  struct { int x, y; } min_mv, max_mv;
253  DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255  uint16_t mvscale[3][2];
257 } VP9Context;
258 
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260  {
261  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263  }, {
264  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
266  }
267 };
268 
270 {
271  VP9Context *s = ctx->priv_data;
272  int ret, sz;
273 
274  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275  return ret;
276  sz = 64 * s->sb_cols * s->sb_rows;
277  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278  ff_thread_release_buffer(ctx, &f->tf);
279  return AVERROR(ENOMEM);
280  }
281 
283  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
284 
285  return 0;
286 }
287 
289 {
290  ff_thread_release_buffer(ctx, &f->tf);
292  f->segmentation_map = NULL;
293 }
294 
296 {
297  int res;
298 
299  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300  return res;
301  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
302  vp9_unref_frame(ctx, dst);
303  return AVERROR(ENOMEM);
304  }
305 
307  dst->mv = src->mv;
308  dst->uses_2pass = src->uses_2pass;
309 
310  return 0;
311 }
312 
313 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 {
315  VP9Context *s = ctx->priv_data;
316  uint8_t *p;
317  int bytesperpixel = s->bytesperpixel;
318 
319  av_assert0(w > 0 && h > 0);
320 
321  if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
322  return 0;
323 
324  ctx->width = w;
325  ctx->height = h;
326  ctx->pix_fmt = fmt;
327  s->sb_cols = (w + 63) >> 6;
328  s->sb_rows = (h + 63) >> 6;
329  s->cols = (w + 7) >> 3;
330  s->rows = (h + 7) >> 3;
331 
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
333  av_freep(&s->intra_pred_data[0]);
334  // FIXME we slightly over-allocate here for subsampled chroma, but a little
335  // bit of padding shouldn't affect performance...
336  p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
337  sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338  if (!p)
339  return AVERROR(ENOMEM);
340  assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
341  assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
342  assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
343  assign(s->above_y_nnz_ctx, uint8_t *, 16);
344  assign(s->above_mode_ctx, uint8_t *, 16);
345  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
346  assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
347  assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
349  assign(s->above_skip_ctx, uint8_t *, 8);
350  assign(s->above_txfm_ctx, uint8_t *, 8);
351  assign(s->above_segpred_ctx, uint8_t *, 8);
352  assign(s->above_intra_ctx, uint8_t *, 8);
353  assign(s->above_comp_ctx, uint8_t *, 8);
354  assign(s->above_ref_ctx, uint8_t *, 8);
355  assign(s->above_filter_ctx, uint8_t *, 8);
356  assign(s->lflvl, struct VP9Filter *, 1);
357 #undef assign
358 
359  // these will be re-allocated a little later
360  av_freep(&s->b_base);
361  av_freep(&s->block_base);
362 
363  if (s->bpp != s->last_bpp) {
364  ff_vp9dsp_init(&s->dsp, s->bpp);
365  ff_videodsp_init(&s->vdsp, s->bpp);
366  s->last_bpp = s->bpp;
367  }
368 
369  return 0;
370 }
371 
373 {
374  VP9Context *s = ctx->priv_data;
375  int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 
378  return 0;
379 
380  av_free(s->b_base);
381  av_free(s->block_base);
382  chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
383  chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
384  if (s->frames[CUR_FRAME].uses_2pass) {
385  int sbs = s->sb_cols * s->sb_rows;
386 
387  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
388  s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
389  16 * 16 + 2 * chroma_eobs) * sbs);
390  if (!s->b_base || !s->block_base)
391  return AVERROR(ENOMEM);
392  s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
393  s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
394  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
395  s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
396  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397  } else {
398  s->b_base = av_malloc(sizeof(VP9Block));
399  s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
400  16 * 16 + 2 * chroma_eobs);
401  if (!s->b_base || !s->block_base)
402  return AVERROR(ENOMEM);
403  s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
404  s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
405  s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
406  s->uveob_base[0] = s->eob_base + 16 * 16;
407  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408  }
410 
411  return 0;
412 }
413 
414 // for some reason the sign bit is at the end, not the start, of a bit sequence
416 {
417  int v = get_bits(gb, n);
418  return get_bits1(gb) ? -v : v;
419 }
420 
422 {
423  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
424 }
425 
426 // differential forward probability updates
427 static int update_prob(VP56RangeCoder *c, int p)
428 {
429  static const int inv_map_table[255] = {
430  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
431  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
432  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
433  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
434  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
435  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
436  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
437  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
438  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
439  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
440  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
441  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
442  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
443  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
444  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
445  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
446  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
447  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
448  252, 253, 253,
449  };
450  int d;
451 
452  /* This code is trying to do a differential probability update. For a
453  * current probability A in the range [1, 255], the difference to a new
454  * probability of any value can be expressed differentially as 1-A,255-A
455  * where some part of this (absolute range) exists both in positive as
456  * well as the negative part, whereas another part only exists in one
457  * half. We're trying to code this shared part differentially, i.e.
458  * times two where the value of the lowest bit specifies the sign, and
459  * the single part is then coded on top of this. This absolute difference
460  * then again has a value of [0,254], but a bigger value in this range
461  * indicates that we're further away from the original value A, so we
462  * can code this as a VLC code, since higher values are increasingly
463  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
464  * updates vs. the 'fine, exact' updates further down the range, which
465  * adds one extra dimension to this differential update model. */
466 
467  if (!vp8_rac_get(c)) {
468  d = vp8_rac_get_uint(c, 4) + 0;
469  } else if (!vp8_rac_get(c)) {
470  d = vp8_rac_get_uint(c, 4) + 16;
471  } else if (!vp8_rac_get(c)) {
472  d = vp8_rac_get_uint(c, 5) + 32;
473  } else {
474  d = vp8_rac_get_uint(c, 7);
475  if (d >= 65)
476  d = (d << 1) - 65 + vp8_rac_get(c);
477  d += 64;
478  av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
479  }
480 
481  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
482  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
483 }
484 
486 {
487  static const enum AVColorSpace colorspaces[8] = {
490  };
491  VP9Context *s = ctx->priv_data;
492  enum AVPixelFormat res;
493  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
494 
495  s->bpp_index = bits;
496  s->bpp = 8 + bits * 2;
497  s->bytesperpixel = (7 + s->bpp) >> 3;
498  ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
499  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
500  static const enum AVPixelFormat pix_fmt_rgb[3] = {
502  };
503  if (ctx->profile & 1) {
504  s->ss_h = s->ss_v = 0;
505  res = pix_fmt_rgb[bits];
507  if (get_bits1(&s->gb)) {
508  av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
509  return AVERROR_INVALIDDATA;
510  }
511  } else {
512  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
513  ctx->profile);
514  return AVERROR_INVALIDDATA;
515  }
516  } else {
517  static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
524  };
526  if (ctx->profile & 1) {
527  s->ss_h = get_bits1(&s->gb);
528  s->ss_v = get_bits1(&s->gb);
529  if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
530  av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
531  ctx->profile);
532  return AVERROR_INVALIDDATA;
533  } else if (get_bits1(&s->gb)) {
534  av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
535  ctx->profile);
536  return AVERROR_INVALIDDATA;
537  }
538  } else {
539  s->ss_h = s->ss_v = 1;
540  res = pix_fmt_for_ss[bits][1][1];
541  }
542  }
543 
544  return res;
545 }
546 
548  const uint8_t *data, int size, int *ref)
549 {
550  VP9Context *s = ctx->priv_data;
551  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
552  enum AVPixelFormat fmt = ctx->pix_fmt;
553  int last_invisible;
554  const uint8_t *data2;
555 
556  /* general header */
557  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
558  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
559  return res;
560  }
561  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
562  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
563  return AVERROR_INVALIDDATA;
564  }
565  ctx->profile = get_bits1(&s->gb);
566  ctx->profile |= get_bits1(&s->gb) << 1;
567  if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
568  if (ctx->profile > 3) {
569  av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
570  return AVERROR_INVALIDDATA;
571  }
572  if (get_bits1(&s->gb)) {
573  *ref = get_bits(&s->gb, 3);
574  return 0;
575  }
576  s->last_keyframe = s->keyframe;
577  s->keyframe = !get_bits1(&s->gb);
578  last_invisible = s->invisible;
579  s->invisible = !get_bits1(&s->gb);
580  s->errorres = get_bits1(&s->gb);
581  s->use_last_frame_mvs = !s->errorres && !last_invisible;
582  if (s->keyframe) {
583  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
584  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
585  return AVERROR_INVALIDDATA;
586  }
587  if ((fmt = read_colorspace_details(ctx)) < 0)
588  return fmt;
589  // for profile 1, here follows the subsampling bits
590  s->refreshrefmask = 0xff;
591  w = get_bits(&s->gb, 16) + 1;
592  h = get_bits(&s->gb, 16) + 1;
593  if (get_bits1(&s->gb)) // display size
594  skip_bits(&s->gb, 32);
595  } else {
596  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
597  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
598  if (s->intraonly) {
599  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
600  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
601  return AVERROR_INVALIDDATA;
602  }
603  if (ctx->profile >= 1) {
604  if ((fmt = read_colorspace_details(ctx)) < 0)
605  return fmt;
606  } else {
607  s->ss_h = s->ss_v = 1;
608  s->bpp = 8;
609  s->bpp_index = 0;
610  s->bytesperpixel = 1;
611  fmt = AV_PIX_FMT_YUV420P;
614  }
615  s->refreshrefmask = get_bits(&s->gb, 8);
616  w = get_bits(&s->gb, 16) + 1;
617  h = get_bits(&s->gb, 16) + 1;
618  if (get_bits1(&s->gb)) // display size
619  skip_bits(&s->gb, 32);
620  } else {
621  s->refreshrefmask = get_bits(&s->gb, 8);
622  s->refidx[0] = get_bits(&s->gb, 3);
623  s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
624  s->refidx[1] = get_bits(&s->gb, 3);
625  s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
626  s->refidx[2] = get_bits(&s->gb, 3);
627  s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
628  if (!s->refs[s->refidx[0]].f->data[0] ||
629  !s->refs[s->refidx[1]].f->data[0] ||
630  !s->refs[s->refidx[2]].f->data[0]) {
631  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
632  return AVERROR_INVALIDDATA;
633  }
634  if (get_bits1(&s->gb)) {
635  w = s->refs[s->refidx[0]].f->width;
636  h = s->refs[s->refidx[0]].f->height;
637  } else if (get_bits1(&s->gb)) {
638  w = s->refs[s->refidx[1]].f->width;
639  h = s->refs[s->refidx[1]].f->height;
640  } else if (get_bits1(&s->gb)) {
641  w = s->refs[s->refidx[2]].f->width;
642  h = s->refs[s->refidx[2]].f->height;
643  } else {
644  w = get_bits(&s->gb, 16) + 1;
645  h = get_bits(&s->gb, 16) + 1;
646  }
647  // Note that in this code, "CUR_FRAME" is actually before we
648  // have formally allocated a frame, and thus actually represents
649  // the _last_ frame
650  s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
651  s->frames[CUR_FRAME].tf.f->height == h;
652  if (get_bits1(&s->gb)) // display size
653  skip_bits(&s->gb, 32);
654  s->highprecisionmvs = get_bits1(&s->gb);
656  get_bits(&s->gb, 2);
657  s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
658  s->signbias[0] != s->signbias[2]);
659  if (s->allowcompinter) {
660  if (s->signbias[0] == s->signbias[1]) {
661  s->fixcompref = 2;
662  s->varcompref[0] = 0;
663  s->varcompref[1] = 1;
664  } else if (s->signbias[0] == s->signbias[2]) {
665  s->fixcompref = 1;
666  s->varcompref[0] = 0;
667  s->varcompref[1] = 2;
668  } else {
669  s->fixcompref = 0;
670  s->varcompref[0] = 1;
671  s->varcompref[1] = 2;
672  }
673  }
674 
675  for (i = 0; i < 3; i++) {
676  AVFrame *ref = s->refs[s->refidx[i]].f;
677  int refw = ref->width, refh = ref->height;
678 
679  if (ref->format != fmt) {
680  av_log(ctx, AV_LOG_ERROR,
681  "Ref pixfmt (%s) did not match current frame (%s)",
683  av_get_pix_fmt_name(fmt));
684  return AVERROR_INVALIDDATA;
685  } else if (refw == w && refh == h) {
686  s->mvscale[i][0] = s->mvscale[i][1] = 0;
687  } else {
688  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
689  av_log(ctx, AV_LOG_ERROR,
690  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
691  refw, refh, w, h);
692  return AVERROR_INVALIDDATA;
693  }
694  s->mvscale[i][0] = (refw << 14) / w;
695  s->mvscale[i][1] = (refh << 14) / h;
696  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
697  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
698  }
699  }
700  }
701  }
702  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
703  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
704  s->framectxid = c = get_bits(&s->gb, 2);
705 
706  /* loopfilter header data */
707  if (s->keyframe || s->errorres || s->intraonly) {
708  // reset loopfilter defaults
709  s->lf_delta.ref[0] = 1;
710  s->lf_delta.ref[1] = 0;
711  s->lf_delta.ref[2] = -1;
712  s->lf_delta.ref[3] = -1;
713  s->lf_delta.mode[0] = 0;
714  s->lf_delta.mode[1] = 0;
715  memset(s->segmentation.feat, 0, sizeof(s->segmentation.feat));
716  }
717  s->filter.level = get_bits(&s->gb, 6);
718  sharp = get_bits(&s->gb, 3);
719  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
720  // the old cache values since they are still valid
721  if (s->filter.sharpness != sharp)
722  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
723  s->filter.sharpness = sharp;
724  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
725  if (get_bits1(&s->gb)) {
726  for (i = 0; i < 4; i++)
727  if (get_bits1(&s->gb))
728  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
729  for (i = 0; i < 2; i++)
730  if (get_bits1(&s->gb))
731  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
732  }
733  }
734 
735  /* quantization header data */
736  s->yac_qi = get_bits(&s->gb, 8);
737  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
738  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
739  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
740  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
741  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
742  if (s->lossless)
744 
745  /* segmentation header info */
747  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
748  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
749  for (i = 0; i < 7; i++)
750  s->prob.seg[i] = get_bits1(&s->gb) ?
751  get_bits(&s->gb, 8) : 255;
752  if ((s->segmentation.temporal = get_bits1(&s->gb))) {
753  for (i = 0; i < 3; i++)
754  s->prob.segpred[i] = get_bits1(&s->gb) ?
755  get_bits(&s->gb, 8) : 255;
756  }
757  }
758  if ((!s->segmentation.update_map || s->segmentation.temporal) &&
759  (w != s->frames[CUR_FRAME].tf.f->width ||
760  h != s->frames[CUR_FRAME].tf.f->height)) {
761  av_log(ctx, AV_LOG_WARNING,
762  "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
765  //return AVERROR_INVALIDDATA;
766  }
767 
768  if (get_bits1(&s->gb)) {
770  for (i = 0; i < 8; i++) {
771  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
772  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
773  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
774  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
775  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
776  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
777  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
778  }
779  }
780  }
781 
782  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
783  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
784  int qyac, qydc, quvac, quvdc, lflvl, sh;
785 
786  if (s->segmentation.enabled && s->segmentation.feat[i].q_enabled) {
788  qyac = av_clip_uintp2(s->segmentation.feat[i].q_val, 8);
789  else
790  qyac = av_clip_uintp2(s->yac_qi + s->segmentation.feat[i].q_val, 8);
791  } else {
792  qyac = s->yac_qi;
793  }
794  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
795  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
796  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
797  qyac = av_clip_uintp2(qyac, 8);
798 
799  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
800  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
801  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
802  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
803 
804  sh = s->filter.level >= 32;
805  if (s->segmentation.enabled && s->segmentation.feat[i].lf_enabled) {
807  lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
808  else
809  lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
810  } else {
811  lflvl = s->filter.level;
812  }
813  if (s->lf_delta.enabled) {
814  s->segmentation.feat[i].lflvl[0][0] =
815  s->segmentation.feat[i].lflvl[0][1] =
816  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
817  for (j = 1; j < 4; j++) {
818  s->segmentation.feat[i].lflvl[j][0] =
819  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820  s->lf_delta.mode[0]) * (1 << sh)), 6);
821  s->segmentation.feat[i].lflvl[j][1] =
822  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
823  s->lf_delta.mode[1]) * (1 << sh)), 6);
824  }
825  } else {
826  memset(s->segmentation.feat[i].lflvl, lflvl,
827  sizeof(s->segmentation.feat[i].lflvl));
828  }
829  }
830 
831  /* tiling info */
832  if ((res = update_size(ctx, w, h, fmt)) < 0) {
833  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
834  return res;
835  }
836  for (s->tiling.log2_tile_cols = 0;
837  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
838  s->tiling.log2_tile_cols++) ;
839  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
840  max = FFMAX(0, max - 1);
841  while (max > s->tiling.log2_tile_cols) {
842  if (get_bits1(&s->gb))
843  s->tiling.log2_tile_cols++;
844  else
845  break;
846  }
847  s->tiling.log2_tile_rows = decode012(&s->gb);
848  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
849  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
850  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
851  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
852  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
853  if (!s->c_b) {
854  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
855  return AVERROR(ENOMEM);
856  }
857  }
858 
859  if (s->keyframe || s->errorres || (s->intraonly && s->resetctx == 3)) {
860  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
861  s->prob_ctx[3].p = vp9_default_probs;
862  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
863  sizeof(vp9_default_coef_probs));
864  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
865  sizeof(vp9_default_coef_probs));
866  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
867  sizeof(vp9_default_coef_probs));
868  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
869  sizeof(vp9_default_coef_probs));
870  } else if (s->intraonly && s->resetctx == 2) {
872  memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
873  sizeof(vp9_default_coef_probs));
874  }
875 
876  // next 16 bits is size of the rest of the header (arith-coded)
877  size2 = get_bits(&s->gb, 16);
878  data2 = align_get_bits(&s->gb);
879  if (size2 > size - (data2 - data)) {
880  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
881  return AVERROR_INVALIDDATA;
882  }
883  ff_vp56_init_range_decoder(&s->c, data2, size2);
884  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
885  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
886  return AVERROR_INVALIDDATA;
887  }
888 
889  if (s->keyframe || s->intraonly) {
890  memset(s->counts.coef, 0, sizeof(s->counts.coef));
891  memset(s->counts.eob, 0, sizeof(s->counts.eob));
892  } else {
893  memset(&s->counts, 0, sizeof(s->counts));
894  }
895  // FIXME is it faster to not copy here, but do it down in the fw updates
896  // as explicit copies if the fw update is missing (and skip the copy upon
897  // fw update)?
898  s->prob.p = s->prob_ctx[c].p;
899 
900  // txfm updates
901  if (s->lossless) {
902  s->txfmmode = TX_4X4;
903  } else {
904  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
905  if (s->txfmmode == 3)
906  s->txfmmode += vp8_rac_get(&s->c);
907 
908  if (s->txfmmode == TX_SWITCHABLE) {
909  for (i = 0; i < 2; i++)
910  if (vp56_rac_get_prob_branchy(&s->c, 252))
911  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
912  for (i = 0; i < 2; i++)
913  for (j = 0; j < 2; j++)
914  if (vp56_rac_get_prob_branchy(&s->c, 252))
915  s->prob.p.tx16p[i][j] =
916  update_prob(&s->c, s->prob.p.tx16p[i][j]);
917  for (i = 0; i < 2; i++)
918  for (j = 0; j < 3; j++)
919  if (vp56_rac_get_prob_branchy(&s->c, 252))
920  s->prob.p.tx32p[i][j] =
921  update_prob(&s->c, s->prob.p.tx32p[i][j]);
922  }
923  }
924 
925  // coef updates
926  for (i = 0; i < 4; i++) {
927  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
928  if (vp8_rac_get(&s->c)) {
929  for (j = 0; j < 2; j++)
930  for (k = 0; k < 2; k++)
931  for (l = 0; l < 6; l++)
932  for (m = 0; m < 6; m++) {
933  uint8_t *p = s->prob.coef[i][j][k][l][m];
934  uint8_t *r = ref[j][k][l][m];
935  if (m >= 3 && l == 0) // dc only has 3 pt
936  break;
937  for (n = 0; n < 3; n++) {
938  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
939  p[n] = update_prob(&s->c, r[n]);
940  } else {
941  p[n] = r[n];
942  }
943  }
944  p[3] = 0;
945  }
946  } else {
947  for (j = 0; j < 2; j++)
948  for (k = 0; k < 2; k++)
949  for (l = 0; l < 6; l++)
950  for (m = 0; m < 6; m++) {
951  uint8_t *p = s->prob.coef[i][j][k][l][m];
952  uint8_t *r = ref[j][k][l][m];
953  if (m > 3 && l == 0) // dc only has 3 pt
954  break;
955  memcpy(p, r, 3);
956  p[3] = 0;
957  }
958  }
959  if (s->txfmmode == i)
960  break;
961  }
962 
963  // mode updates
964  for (i = 0; i < 3; i++)
965  if (vp56_rac_get_prob_branchy(&s->c, 252))
966  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
967  if (!s->keyframe && !s->intraonly) {
968  for (i = 0; i < 7; i++)
969  for (j = 0; j < 3; j++)
970  if (vp56_rac_get_prob_branchy(&s->c, 252))
971  s->prob.p.mv_mode[i][j] =
972  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
973 
974  if (s->filtermode == FILTER_SWITCHABLE)
975  for (i = 0; i < 4; i++)
976  for (j = 0; j < 2; j++)
977  if (vp56_rac_get_prob_branchy(&s->c, 252))
978  s->prob.p.filter[i][j] =
979  update_prob(&s->c, s->prob.p.filter[i][j]);
980 
981  for (i = 0; i < 4; i++)
982  if (vp56_rac_get_prob_branchy(&s->c, 252))
983  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
984 
985  if (s->allowcompinter) {
986  s->comppredmode = vp8_rac_get(&s->c);
987  if (s->comppredmode)
988  s->comppredmode += vp8_rac_get(&s->c);
989  if (s->comppredmode == PRED_SWITCHABLE)
990  for (i = 0; i < 5; i++)
991  if (vp56_rac_get_prob_branchy(&s->c, 252))
992  s->prob.p.comp[i] =
993  update_prob(&s->c, s->prob.p.comp[i]);
994  } else {
996  }
997 
998  if (s->comppredmode != PRED_COMPREF) {
999  for (i = 0; i < 5; i++) {
1000  if (vp56_rac_get_prob_branchy(&s->c, 252))
1001  s->prob.p.single_ref[i][0] =
1002  update_prob(&s->c, s->prob.p.single_ref[i][0]);
1003  if (vp56_rac_get_prob_branchy(&s->c, 252))
1004  s->prob.p.single_ref[i][1] =
1005  update_prob(&s->c, s->prob.p.single_ref[i][1]);
1006  }
1007  }
1008 
1009  if (s->comppredmode != PRED_SINGLEREF) {
1010  for (i = 0; i < 5; i++)
1011  if (vp56_rac_get_prob_branchy(&s->c, 252))
1012  s->prob.p.comp_ref[i] =
1013  update_prob(&s->c, s->prob.p.comp_ref[i]);
1014  }
1015 
1016  for (i = 0; i < 4; i++)
1017  for (j = 0; j < 9; j++)
1018  if (vp56_rac_get_prob_branchy(&s->c, 252))
1019  s->prob.p.y_mode[i][j] =
1020  update_prob(&s->c, s->prob.p.y_mode[i][j]);
1021 
1022  for (i = 0; i < 4; i++)
1023  for (j = 0; j < 4; j++)
1024  for (k = 0; k < 3; k++)
1025  if (vp56_rac_get_prob_branchy(&s->c, 252))
1026  s->prob.p.partition[3 - i][j][k] =
1027  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1028 
1029  // mv fields don't use the update_prob subexp model for some reason
1030  for (i = 0; i < 3; i++)
1031  if (vp56_rac_get_prob_branchy(&s->c, 252))
1032  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1033 
1034  for (i = 0; i < 2; i++) {
1035  if (vp56_rac_get_prob_branchy(&s->c, 252))
1036  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037 
1038  for (j = 0; j < 10; j++)
1039  if (vp56_rac_get_prob_branchy(&s->c, 252))
1040  s->prob.p.mv_comp[i].classes[j] =
1041  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042 
1043  if (vp56_rac_get_prob_branchy(&s->c, 252))
1044  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045 
1046  for (j = 0; j < 10; j++)
1047  if (vp56_rac_get_prob_branchy(&s->c, 252))
1048  s->prob.p.mv_comp[i].bits[j] =
1049  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050  }
1051 
1052  for (i = 0; i < 2; i++) {
1053  for (j = 0; j < 2; j++)
1054  for (k = 0; k < 3; k++)
1055  if (vp56_rac_get_prob_branchy(&s->c, 252))
1056  s->prob.p.mv_comp[i].class0_fp[j][k] =
1057  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1058 
1059  for (j = 0; j < 3; j++)
1060  if (vp56_rac_get_prob_branchy(&s->c, 252))
1061  s->prob.p.mv_comp[i].fp[j] =
1062  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063  }
1064 
1065  if (s->highprecisionmvs) {
1066  for (i = 0; i < 2; i++) {
1067  if (vp56_rac_get_prob_branchy(&s->c, 252))
1068  s->prob.p.mv_comp[i].class0_hp =
1069  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1070 
1071  if (vp56_rac_get_prob_branchy(&s->c, 252))
1072  s->prob.p.mv_comp[i].hp =
1073  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1074  }
1075  }
1076  }
1077 
1078  return (data2 - data) + size2;
1079 }
1080 
1081 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1082  VP9Context *s)
1083 {
1084  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1085  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1086 }
1087 
1089  VP56mv *pmv, int ref, int z, int idx, int sb)
1090 {
1091  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1092  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1093  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1094  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1095  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1096  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1097  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1098  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1099  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1100  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1101  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1102  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1103  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1104  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1105  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1106  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1107  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1108  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1109  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1110  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1111  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1112  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1113  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1114  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1115  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1116  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1117  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1118  };
1119  VP9Block *b = s->b;
1120  int row = s->row, col = s->col, row7 = s->row7;
1121  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1122 #define INVALID_MV 0x80008000U
1123  uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1124  int i;
1125 
1126 #define RETURN_DIRECT_MV(mv) \
1127  do { \
1128  uint32_t m = AV_RN32A(&mv); \
1129  if (!idx) { \
1130  AV_WN32A(pmv, m); \
1131  return; \
1132  } else if (mem == INVALID_MV) { \
1133  mem = m; \
1134  } else if (m != mem) { \
1135  AV_WN32A(pmv, m); \
1136  return; \
1137  } \
1138  } while (0)
1139 
1140  if (sb >= 0) {
1141  if (sb == 2 || sb == 1) {
1142  RETURN_DIRECT_MV(b->mv[0][z]);
1143  } else if (sb == 3) {
1144  RETURN_DIRECT_MV(b->mv[2][z]);
1145  RETURN_DIRECT_MV(b->mv[1][z]);
1146  RETURN_DIRECT_MV(b->mv[0][z]);
1147  }
1148 
1149 #define RETURN_MV(mv) \
1150  do { \
1151  if (sb > 0) { \
1152  VP56mv tmp; \
1153  uint32_t m; \
1154  av_assert2(idx == 1); \
1155  av_assert2(mem != INVALID_MV); \
1156  if (mem_sub8x8 == INVALID_MV) { \
1157  clamp_mv(&tmp, &mv, s); \
1158  m = AV_RN32A(&tmp); \
1159  if (m != mem) { \
1160  AV_WN32A(pmv, m); \
1161  return; \
1162  } \
1163  mem_sub8x8 = AV_RN32A(&mv); \
1164  } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1165  clamp_mv(&tmp, &mv, s); \
1166  m = AV_RN32A(&tmp); \
1167  if (m != mem) { \
1168  AV_WN32A(pmv, m); \
1169  } else { \
1170  /* BUG I'm pretty sure this isn't the intention */ \
1171  AV_WN32A(pmv, 0); \
1172  } \
1173  return; \
1174  } \
1175  } else { \
1176  uint32_t m = AV_RN32A(&mv); \
1177  if (!idx) { \
1178  clamp_mv(pmv, &mv, s); \
1179  return; \
1180  } else if (mem == INVALID_MV) { \
1181  mem = m; \
1182  } else if (m != mem) { \
1183  clamp_mv(pmv, &mv, s); \
1184  return; \
1185  } \
1186  } \
1187  } while (0)
1188 
1189  if (row > 0) {
1190  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1191  if (mv->ref[0] == ref) {
1192  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1193  } else if (mv->ref[1] == ref) {
1194  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1195  }
1196  }
1197  if (col > s->tiling.tile_col_start) {
1198  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1199  if (mv->ref[0] == ref) {
1200  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1201  } else if (mv->ref[1] == ref) {
1202  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1203  }
1204  }
1205  i = 2;
1206  } else {
1207  i = 0;
1208  }
1209 
1210  // previously coded MVs in this neighbourhood, using same reference frame
1211  for (; i < 8; i++) {
1212  int c = p[i][0] + col, r = p[i][1] + row;
1213 
1214  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1215  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1216 
1217  if (mv->ref[0] == ref) {
1218  RETURN_MV(mv->mv[0]);
1219  } else if (mv->ref[1] == ref) {
1220  RETURN_MV(mv->mv[1]);
1221  }
1222  }
1223  }
1224 
1225  // MV at this position in previous frame, using same reference frame
1226  if (s->use_last_frame_mvs) {
1227  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1228 
1231  if (mv->ref[0] == ref) {
1232  RETURN_MV(mv->mv[0]);
1233  } else if (mv->ref[1] == ref) {
1234  RETURN_MV(mv->mv[1]);
1235  }
1236  }
1237 
1238 #define RETURN_SCALE_MV(mv, scale) \
1239  do { \
1240  if (scale) { \
1241  VP56mv mv_temp = { -mv.x, -mv.y }; \
1242  RETURN_MV(mv_temp); \
1243  } else { \
1244  RETURN_MV(mv); \
1245  } \
1246  } while (0)
1247 
1248  // previously coded MVs in this neighbourhood, using different reference frame
1249  for (i = 0; i < 8; i++) {
1250  int c = p[i][0] + col, r = p[i][1] + row;
1251 
1252  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1253  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1254 
1255  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1256  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1257  }
1258  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1259  // BUG - libvpx has this condition regardless of whether
1260  // we used the first ref MV and pre-scaling
1261  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1262  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1263  }
1264  }
1265  }
1266 
1267  // MV at this position in previous frame, using different reference frame
1268  if (s->use_last_frame_mvs) {
1269  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1270 
1271  // no need to await_progress, because we already did that above
1272  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1273  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1274  }
1275  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1276  // BUG - libvpx has this condition regardless of whether
1277  // we used the first ref MV and pre-scaling
1278  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1279  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1280  }
1281  }
1282 
1283  AV_ZERO32(pmv);
1284  clamp_mv(pmv, pmv, s);
1285 #undef INVALID_MV
1286 #undef RETURN_MV
1287 #undef RETURN_SCALE_MV
1288 }
1289 
1290 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1291 {
1292  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1293  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1294  s->prob.p.mv_comp[idx].classes);
1295 
1296  s->counts.mv_comp[idx].sign[sign]++;
1297  s->counts.mv_comp[idx].classes[c]++;
1298  if (c) {
1299  int m;
1300 
1301  for (n = 0, m = 0; m < c; m++) {
1302  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1303  n |= bit << m;
1304  s->counts.mv_comp[idx].bits[m][bit]++;
1305  }
1306  n <<= 3;
1307  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1308  n |= bit << 1;
1309  s->counts.mv_comp[idx].fp[bit]++;
1310  if (hp) {
1311  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1312  s->counts.mv_comp[idx].hp[bit]++;
1313  n |= bit;
1314  } else {
1315  n |= 1;
1316  // bug in libvpx - we count for bw entropy purposes even if the
1317  // bit wasn't coded
1318  s->counts.mv_comp[idx].hp[1]++;
1319  }
1320  n += 8 << c;
1321  } else {
1322  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1323  s->counts.mv_comp[idx].class0[n]++;
1324  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1325  s->prob.p.mv_comp[idx].class0_fp[n]);
1326  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1327  n = (n << 3) | (bit << 1);
1328  if (hp) {
1329  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1330  s->counts.mv_comp[idx].class0_hp[bit]++;
1331  n |= bit;
1332  } else {
1333  n |= 1;
1334  // bug in libvpx - we count for bw entropy purposes even if the
1335  // bit wasn't coded
1336  s->counts.mv_comp[idx].class0_hp[1]++;
1337  }
1338  }
1339 
1340  return sign ? -(n + 1) : (n + 1);
1341 }
1342 
1343 static void fill_mv(VP9Context *s,
1344  VP56mv *mv, int mode, int sb)
1345 {
1346  VP9Block *b = s->b;
1347 
1348  if (mode == ZEROMV) {
1349  AV_ZERO64(mv);
1350  } else {
1351  int hp;
1352 
1353  // FIXME cache this value and reuse for other subblocks
1354  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1355  mode == NEWMV ? -1 : sb);
1356  // FIXME maybe move this code into find_ref_mvs()
1357  if ((mode == NEWMV || sb == -1) &&
1358  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1359  if (mv[0].y & 1) {
1360  if (mv[0].y < 0)
1361  mv[0].y++;
1362  else
1363  mv[0].y--;
1364  }
1365  if (mv[0].x & 1) {
1366  if (mv[0].x < 0)
1367  mv[0].x++;
1368  else
1369  mv[0].x--;
1370  }
1371  }
1372  if (mode == NEWMV) {
1374  s->prob.p.mv_joint);
1375 
1376  s->counts.mv_joint[j]++;
1377  if (j >= MV_JOINT_V)
1378  mv[0].y += read_mv_component(s, 0, hp);
1379  if (j & 1)
1380  mv[0].x += read_mv_component(s, 1, hp);
1381  }
1382 
1383  if (b->comp) {
1384  // FIXME cache this value and reuse for other subblocks
1385  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1386  mode == NEWMV ? -1 : sb);
1387  if ((mode == NEWMV || sb == -1) &&
1388  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1389  if (mv[1].y & 1) {
1390  if (mv[1].y < 0)
1391  mv[1].y++;
1392  else
1393  mv[1].y--;
1394  }
1395  if (mv[1].x & 1) {
1396  if (mv[1].x < 0)
1397  mv[1].x++;
1398  else
1399  mv[1].x--;
1400  }
1401  }
1402  if (mode == NEWMV) {
1404  s->prob.p.mv_joint);
1405 
1406  s->counts.mv_joint[j]++;
1407  if (j >= MV_JOINT_V)
1408  mv[1].y += read_mv_component(s, 0, hp);
1409  if (j & 1)
1410  mv[1].x += read_mv_component(s, 1, hp);
1411  }
1412  }
1413  }
1414 }
1415 
1416 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1417  ptrdiff_t stride, int v)
1418 {
1419  switch (w) {
1420  case 1:
1421  do {
1422  *ptr = v;
1423  ptr += stride;
1424  } while (--h);
1425  break;
1426  case 2: {
1427  int v16 = v * 0x0101;
1428  do {
1429  AV_WN16A(ptr, v16);
1430  ptr += stride;
1431  } while (--h);
1432  break;
1433  }
1434  case 4: {
1435  uint32_t v32 = v * 0x01010101;
1436  do {
1437  AV_WN32A(ptr, v32);
1438  ptr += stride;
1439  } while (--h);
1440  break;
1441  }
1442  case 8: {
1443 #if HAVE_FAST_64BIT
1444  uint64_t v64 = v * 0x0101010101010101ULL;
1445  do {
1446  AV_WN64A(ptr, v64);
1447  ptr += stride;
1448  } while (--h);
1449 #else
1450  uint32_t v32 = v * 0x01010101;
1451  do {
1452  AV_WN32A(ptr, v32);
1453  AV_WN32A(ptr + 4, v32);
1454  ptr += stride;
1455  } while (--h);
1456 #endif
1457  break;
1458  }
1459  }
1460 }
1461 
1462 static void decode_mode(AVCodecContext *ctx)
1463 {
1464  static const uint8_t left_ctx[N_BS_SIZES] = {
1465  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1466  };
1467  static const uint8_t above_ctx[N_BS_SIZES] = {
1468  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1469  };
1470  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1472  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1473  };
1474  VP9Context *s = ctx->priv_data;
1475  VP9Block *b = s->b;
1476  int row = s->row, col = s->col, row7 = s->row7;
1477  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1478  int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1479  int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1480  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1481  int vref, filter_id;
1482 
1483  if (!s->segmentation.enabled) {
1484  b->seg_id = 0;
1485  } else if (s->keyframe || s->intraonly) {
1486  b->seg_id = !s->segmentation.update_map ? 0 :
1488  } else if (!s->segmentation.update_map ||
1489  (s->segmentation.temporal &&
1491  s->prob.segpred[s->above_segpred_ctx[col] +
1492  s->left_segpred_ctx[row7]]))) {
1493  if (!s->errorres && !s->segmentation.ignore_refmap) {
1494  int pred = 8, x;
1496 
1499  for (y = 0; y < h4; y++) {
1500  int idx_base = (y + row) * 8 * s->sb_cols + col;
1501  for (x = 0; x < w4; x++)
1502  pred = FFMIN(pred, refsegmap[idx_base + x]);
1503  }
1504  av_assert1(pred < 8);
1505  b->seg_id = pred;
1506  } else {
1507  b->seg_id = 0;
1508  }
1509 
1510  memset(&s->above_segpred_ctx[col], 1, w4);
1511  memset(&s->left_segpred_ctx[row7], 1, h4);
1512  } else {
1514  s->prob.seg);
1515 
1516  memset(&s->above_segpred_ctx[col], 0, w4);
1517  memset(&s->left_segpred_ctx[row7], 0, h4);
1518  }
1519  if (s->segmentation.enabled &&
1520  (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1521  setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1522  bw4, bh4, 8 * s->sb_cols, b->seg_id);
1523  }
1524 
1525  b->skip = s->segmentation.enabled &&
1526  s->segmentation.feat[b->seg_id].skip_enabled;
1527  if (!b->skip) {
1528  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1529  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1530  s->counts.skip[c][b->skip]++;
1531  }
1532 
1533  if (s->keyframe || s->intraonly) {
1534  b->intra = 1;
1535  } else if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1536  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1537  } else {
1538  int c, bit;
1539 
1540  if (have_a && have_l) {
1541  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1542  c += (c == 2);
1543  } else {
1544  c = have_a ? 2 * s->above_intra_ctx[col] :
1545  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1546  }
1547  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1548  s->counts.intra[c][bit]++;
1549  b->intra = !bit;
1550  }
1551 
1552  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1553  int c;
1554  if (have_a) {
1555  if (have_l) {
1556  c = (s->above_skip_ctx[col] ? max_tx :
1557  s->above_txfm_ctx[col]) +
1558  (s->left_skip_ctx[row7] ? max_tx :
1559  s->left_txfm_ctx[row7]) > max_tx;
1560  } else {
1561  c = s->above_skip_ctx[col] ? 1 :
1562  (s->above_txfm_ctx[col] * 2 > max_tx);
1563  }
1564  } else if (have_l) {
1565  c = s->left_skip_ctx[row7] ? 1 :
1566  (s->left_txfm_ctx[row7] * 2 > max_tx);
1567  } else {
1568  c = 1;
1569  }
1570  switch (max_tx) {
1571  case TX_32X32:
1572  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1573  if (b->tx) {
1574  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1575  if (b->tx == 2)
1576  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1577  }
1578  s->counts.tx32p[c][b->tx]++;
1579  break;
1580  case TX_16X16:
1581  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1582  if (b->tx)
1583  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1584  s->counts.tx16p[c][b->tx]++;
1585  break;
1586  case TX_8X8:
1587  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1588  s->counts.tx8p[c][b->tx]++;
1589  break;
1590  case TX_4X4:
1591  b->tx = TX_4X4;
1592  break;
1593  }
1594  } else {
1595  b->tx = FFMIN(max_tx, s->txfmmode);
1596  }
1597 
1598  if (s->keyframe || s->intraonly) {
1599  uint8_t *a = &s->above_mode_ctx[col * 2];
1600  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1601 
1602  b->comp = 0;
1603  if (b->bs > BS_8x8) {
1604  // FIXME the memory storage intermediates here aren't really
1605  // necessary, they're just there to make the code slightly
1606  // simpler for now
1607  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1608  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1609  if (b->bs != BS_8x4) {
1611  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1612  l[0] = a[1] = b->mode[1];
1613  } else {
1614  l[0] = a[1] = b->mode[1] = b->mode[0];
1615  }
1616  if (b->bs != BS_4x8) {
1617  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1618  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1619  if (b->bs != BS_8x4) {
1621  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1622  l[1] = a[1] = b->mode[3];
1623  } else {
1624  l[1] = a[1] = b->mode[3] = b->mode[2];
1625  }
1626  } else {
1627  b->mode[2] = b->mode[0];
1628  l[1] = a[1] = b->mode[3] = b->mode[1];
1629  }
1630  } else {
1632  vp9_default_kf_ymode_probs[*a][*l]);
1633  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1634  // FIXME this can probably be optimized
1635  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1636  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1637  }
1640  } else if (b->intra) {
1641  b->comp = 0;
1642  if (b->bs > BS_8x8) {
1644  s->prob.p.y_mode[0]);
1645  s->counts.y_mode[0][b->mode[0]]++;
1646  if (b->bs != BS_8x4) {
1648  s->prob.p.y_mode[0]);
1649  s->counts.y_mode[0][b->mode[1]]++;
1650  } else {
1651  b->mode[1] = b->mode[0];
1652  }
1653  if (b->bs != BS_4x8) {
1655  s->prob.p.y_mode[0]);
1656  s->counts.y_mode[0][b->mode[2]]++;
1657  if (b->bs != BS_8x4) {
1659  s->prob.p.y_mode[0]);
1660  s->counts.y_mode[0][b->mode[3]]++;
1661  } else {
1662  b->mode[3] = b->mode[2];
1663  }
1664  } else {
1665  b->mode[2] = b->mode[0];
1666  b->mode[3] = b->mode[1];
1667  }
1668  } else {
1669  static const uint8_t size_group[10] = {
1670  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1671  };
1672  int sz = size_group[b->bs];
1673 
1675  s->prob.p.y_mode[sz]);
1676  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1677  s->counts.y_mode[sz][b->mode[3]]++;
1678  }
1680  s->prob.p.uv_mode[b->mode[3]]);
1681  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1682  } else {
1683  static const uint8_t inter_mode_ctx_lut[14][14] = {
1684  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1687  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1688  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1689  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1690  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1691  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1692  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1693  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1694  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1695  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1696  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1697  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1698  };
1699 
1700  if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1701  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1702  b->comp = 0;
1703  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1704  } else {
1705  // read comp_pred flag
1706  if (s->comppredmode != PRED_SWITCHABLE) {
1707  b->comp = s->comppredmode == PRED_COMPREF;
1708  } else {
1709  int c;
1710 
1711  // FIXME add intra as ref=0xff (or -1) to make these easier?
1712  if (have_a) {
1713  if (have_l) {
1714  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1715  c = 4;
1716  } else if (s->above_comp_ctx[col]) {
1717  c = 2 + (s->left_intra_ctx[row7] ||
1718  s->left_ref_ctx[row7] == s->fixcompref);
1719  } else if (s->left_comp_ctx[row7]) {
1720  c = 2 + (s->above_intra_ctx[col] ||
1721  s->above_ref_ctx[col] == s->fixcompref);
1722  } else {
1723  c = (!s->above_intra_ctx[col] &&
1724  s->above_ref_ctx[col] == s->fixcompref) ^
1725  (!s->left_intra_ctx[row7] &&
1726  s->left_ref_ctx[row & 7] == s->fixcompref);
1727  }
1728  } else {
1729  c = s->above_comp_ctx[col] ? 3 :
1730  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1731  }
1732  } else if (have_l) {
1733  c = s->left_comp_ctx[row7] ? 3 :
1734  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1735  } else {
1736  c = 1;
1737  }
1738  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1739  s->counts.comp[c][b->comp]++;
1740  }
1741 
1742  // read actual references
1743  // FIXME probably cache a few variables here to prevent repetitive
1744  // memory accesses below
1745  if (b->comp) /* two references */ {
1746  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1747 
1748  b->ref[fix_idx] = s->fixcompref;
1749  // FIXME can this codeblob be replaced by some sort of LUT?
1750  if (have_a) {
1751  if (have_l) {
1752  if (s->above_intra_ctx[col]) {
1753  if (s->left_intra_ctx[row7]) {
1754  c = 2;
1755  } else {
1756  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1757  }
1758  } else if (s->left_intra_ctx[row7]) {
1759  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1760  } else {
1761  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1762 
1763  if (refl == refa && refa == s->varcompref[1]) {
1764  c = 0;
1765  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1766  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1767  (refl == s->fixcompref && refa == s->varcompref[0])) {
1768  c = 4;
1769  } else {
1770  c = (refa == refl) ? 3 : 1;
1771  }
1772  } else if (!s->left_comp_ctx[row7]) {
1773  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1774  c = 1;
1775  } else {
1776  c = (refl == s->varcompref[1] &&
1777  refa != s->varcompref[1]) ? 2 : 4;
1778  }
1779  } else if (!s->above_comp_ctx[col]) {
1780  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1781  c = 1;
1782  } else {
1783  c = (refa == s->varcompref[1] &&
1784  refl != s->varcompref[1]) ? 2 : 4;
1785  }
1786  } else {
1787  c = (refl == refa) ? 4 : 2;
1788  }
1789  }
1790  } else {
1791  if (s->above_intra_ctx[col]) {
1792  c = 2;
1793  } else if (s->above_comp_ctx[col]) {
1794  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1795  } else {
1796  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1797  }
1798  }
1799  } else if (have_l) {
1800  if (s->left_intra_ctx[row7]) {
1801  c = 2;
1802  } else if (s->left_comp_ctx[row7]) {
1803  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1804  } else {
1805  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1806  }
1807  } else {
1808  c = 2;
1809  }
1810  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1811  b->ref[var_idx] = s->varcompref[bit];
1812  s->counts.comp_ref[c][bit]++;
1813  } else /* single reference */ {
1814  int bit, c;
1815 
1816  if (have_a && !s->above_intra_ctx[col]) {
1817  if (have_l && !s->left_intra_ctx[row7]) {
1818  if (s->left_comp_ctx[row7]) {
1819  if (s->above_comp_ctx[col]) {
1820  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1821  !s->above_ref_ctx[col]);
1822  } else {
1823  c = (3 * !s->above_ref_ctx[col]) +
1824  (!s->fixcompref || !s->left_ref_ctx[row7]);
1825  }
1826  } else if (s->above_comp_ctx[col]) {
1827  c = (3 * !s->left_ref_ctx[row7]) +
1828  (!s->fixcompref || !s->above_ref_ctx[col]);
1829  } else {
1830  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1831  }
1832  } else if (s->above_intra_ctx[col]) {
1833  c = 2;
1834  } else if (s->above_comp_ctx[col]) {
1835  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1836  } else {
1837  c = 4 * (!s->above_ref_ctx[col]);
1838  }
1839  } else if (have_l && !s->left_intra_ctx[row7]) {
1840  if (s->left_intra_ctx[row7]) {
1841  c = 2;
1842  } else if (s->left_comp_ctx[row7]) {
1843  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1844  } else {
1845  c = 4 * (!s->left_ref_ctx[row7]);
1846  }
1847  } else {
1848  c = 2;
1849  }
1850  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1851  s->counts.single_ref[c][0][bit]++;
1852  if (!bit) {
1853  b->ref[0] = 0;
1854  } else {
1855  // FIXME can this codeblob be replaced by some sort of LUT?
1856  if (have_a) {
1857  if (have_l) {
1858  if (s->left_intra_ctx[row7]) {
1859  if (s->above_intra_ctx[col]) {
1860  c = 2;
1861  } else if (s->above_comp_ctx[col]) {
1862  c = 1 + 2 * (s->fixcompref == 1 ||
1863  s->above_ref_ctx[col] == 1);
1864  } else if (!s->above_ref_ctx[col]) {
1865  c = 3;
1866  } else {
1867  c = 4 * (s->above_ref_ctx[col] == 1);
1868  }
1869  } else if (s->above_intra_ctx[col]) {
1870  if (s->left_intra_ctx[row7]) {
1871  c = 2;
1872  } else if (s->left_comp_ctx[row7]) {
1873  c = 1 + 2 * (s->fixcompref == 1 ||
1874  s->left_ref_ctx[row7] == 1);
1875  } else if (!s->left_ref_ctx[row7]) {
1876  c = 3;
1877  } else {
1878  c = 4 * (s->left_ref_ctx[row7] == 1);
1879  }
1880  } else if (s->above_comp_ctx[col]) {
1881  if (s->left_comp_ctx[row7]) {
1882  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1883  c = 3 * (s->fixcompref == 1 ||
1884  s->left_ref_ctx[row7] == 1);
1885  } else {
1886  c = 2;
1887  }
1888  } else if (!s->left_ref_ctx[row7]) {
1889  c = 1 + 2 * (s->fixcompref == 1 ||
1890  s->above_ref_ctx[col] == 1);
1891  } else {
1892  c = 3 * (s->left_ref_ctx[row7] == 1) +
1893  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1894  }
1895  } else if (s->left_comp_ctx[row7]) {
1896  if (!s->above_ref_ctx[col]) {
1897  c = 1 + 2 * (s->fixcompref == 1 ||
1898  s->left_ref_ctx[row7] == 1);
1899  } else {
1900  c = 3 * (s->above_ref_ctx[col] == 1) +
1901  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1902  }
1903  } else if (!s->above_ref_ctx[col]) {
1904  if (!s->left_ref_ctx[row7]) {
1905  c = 3;
1906  } else {
1907  c = 4 * (s->left_ref_ctx[row7] == 1);
1908  }
1909  } else if (!s->left_ref_ctx[row7]) {
1910  c = 4 * (s->above_ref_ctx[col] == 1);
1911  } else {
1912  c = 2 * (s->left_ref_ctx[row7] == 1) +
1913  2 * (s->above_ref_ctx[col] == 1);
1914  }
1915  } else {
1916  if (s->above_intra_ctx[col] ||
1917  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1918  c = 2;
1919  } else if (s->above_comp_ctx[col]) {
1920  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1921  } else {
1922  c = 4 * (s->above_ref_ctx[col] == 1);
1923  }
1924  }
1925  } else if (have_l) {
1926  if (s->left_intra_ctx[row7] ||
1927  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1928  c = 2;
1929  } else if (s->left_comp_ctx[row7]) {
1930  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1931  } else {
1932  c = 4 * (s->left_ref_ctx[row7] == 1);
1933  }
1934  } else {
1935  c = 2;
1936  }
1937  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1938  s->counts.single_ref[c][1][bit]++;
1939  b->ref[0] = 1 + bit;
1940  }
1941  }
1942  }
1943 
1944  if (b->bs <= BS_8x8) {
1945  if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].skip_enabled) {
1946  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1947  } else {
1948  static const uint8_t off[10] = {
1949  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1950  };
1951 
1952  // FIXME this needs to use the LUT tables from find_ref_mvs
1953  // because not all are -1,0/0,-1
1954  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1955  [s->left_mode_ctx[row7 + off[b->bs]]];
1956 
1958  s->prob.p.mv_mode[c]);
1959  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1960  s->counts.mv_mode[c][b->mode[0] - 10]++;
1961  }
1962  }
1963 
1964  if (s->filtermode == FILTER_SWITCHABLE) {
1965  int c;
1966 
1967  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1968  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1969  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1970  s->left_filter_ctx[row7] : 3;
1971  } else {
1972  c = s->above_filter_ctx[col];
1973  }
1974  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1975  c = s->left_filter_ctx[row7];
1976  } else {
1977  c = 3;
1978  }
1979 
1980  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1981  s->prob.p.filter[c]);
1982  s->counts.filter[c][filter_id]++;
1983  b->filter = vp9_filter_lut[filter_id];
1984  } else {
1985  b->filter = s->filtermode;
1986  }
1987 
1988  if (b->bs > BS_8x8) {
1989  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1990 
1992  s->prob.p.mv_mode[c]);
1993  s->counts.mv_mode[c][b->mode[0] - 10]++;
1994  fill_mv(s, b->mv[0], b->mode[0], 0);
1995 
1996  if (b->bs != BS_8x4) {
1998  s->prob.p.mv_mode[c]);
1999  s->counts.mv_mode[c][b->mode[1] - 10]++;
2000  fill_mv(s, b->mv[1], b->mode[1], 1);
2001  } else {
2002  b->mode[1] = b->mode[0];
2003  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2004  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2005  }
2006 
2007  if (b->bs != BS_4x8) {
2009  s->prob.p.mv_mode[c]);
2010  s->counts.mv_mode[c][b->mode[2] - 10]++;
2011  fill_mv(s, b->mv[2], b->mode[2], 2);
2012 
2013  if (b->bs != BS_8x4) {
2015  s->prob.p.mv_mode[c]);
2016  s->counts.mv_mode[c][b->mode[3] - 10]++;
2017  fill_mv(s, b->mv[3], b->mode[3], 3);
2018  } else {
2019  b->mode[3] = b->mode[2];
2020  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2021  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2022  }
2023  } else {
2024  b->mode[2] = b->mode[0];
2025  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2026  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2027  b->mode[3] = b->mode[1];
2028  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2029  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2030  }
2031  } else {
2032  fill_mv(s, b->mv[0], b->mode[0], -1);
2033  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2034  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2035  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2036  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2037  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2038  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2039  }
2040 
2041  vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2042  }
2043 
2044 #if HAVE_FAST_64BIT
2045 #define SPLAT_CTX(var, val, n) \
2046  switch (n) { \
2047  case 1: var = val; break; \
2048  case 2: AV_WN16A(&var, val * 0x0101); break; \
2049  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2050  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2051  case 16: { \
2052  uint64_t v64 = val * 0x0101010101010101ULL; \
2053  AV_WN64A( &var, v64); \
2054  AV_WN64A(&((uint8_t *) &var)[8], v64); \
2055  break; \
2056  } \
2057  }
2058 #else
2059 #define SPLAT_CTX(var, val, n) \
2060  switch (n) { \
2061  case 1: var = val; break; \
2062  case 2: AV_WN16A(&var, val * 0x0101); break; \
2063  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2064  case 8: { \
2065  uint32_t v32 = val * 0x01010101; \
2066  AV_WN32A( &var, v32); \
2067  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2068  break; \
2069  } \
2070  case 16: { \
2071  uint32_t v32 = val * 0x01010101; \
2072  AV_WN32A( &var, v32); \
2073  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2074  AV_WN32A(&((uint8_t *) &var)[8], v32); \
2075  AV_WN32A(&((uint8_t *) &var)[12], v32); \
2076  break; \
2077  } \
2078  }
2079 #endif
2080 
2081  switch (bwh_tab[1][b->bs][0]) {
2082 #define SET_CTXS(dir, off, n) \
2083  do { \
2084  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2085  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2086  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2087  if (!s->keyframe && !s->intraonly) { \
2088  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2089  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2090  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2091  if (!b->intra) { \
2092  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2093  if (s->filtermode == FILTER_SWITCHABLE) { \
2094  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2095  } \
2096  } \
2097  } \
2098  } while (0)
2099  case 1: SET_CTXS(above, col, 1); break;
2100  case 2: SET_CTXS(above, col, 2); break;
2101  case 4: SET_CTXS(above, col, 4); break;
2102  case 8: SET_CTXS(above, col, 8); break;
2103  }
2104  switch (bwh_tab[1][b->bs][1]) {
2105  case 1: SET_CTXS(left, row7, 1); break;
2106  case 2: SET_CTXS(left, row7, 2); break;
2107  case 4: SET_CTXS(left, row7, 4); break;
2108  case 8: SET_CTXS(left, row7, 8); break;
2109  }
2110 #undef SPLAT_CTX
2111 #undef SET_CTXS
2112 
2113  if (!s->keyframe && !s->intraonly) {
2114  if (b->bs > BS_8x8) {
2115  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2116 
2117  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2118  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2119  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2120  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2121  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2122  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2123  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2124  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2125  } else {
2126  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2127 
2128  for (n = 0; n < w4 * 2; n++) {
2129  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2130  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2131  }
2132  for (n = 0; n < h4 * 2; n++) {
2133  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2134  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2135  }
2136  }
2137  }
2138 
2139  // FIXME kinda ugly
2140  for (y = 0; y < h4; y++) {
2141  int x, o = (row + y) * s->sb_cols * 8 + col;
2142  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2143 
2144  if (b->intra) {
2145  for (x = 0; x < w4; x++) {
2146  mv[x].ref[0] =
2147  mv[x].ref[1] = -1;
2148  }
2149  } else if (b->comp) {
2150  for (x = 0; x < w4; x++) {
2151  mv[x].ref[0] = b->ref[0];
2152  mv[x].ref[1] = b->ref[1];
2153  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2154  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2155  }
2156  } else {
2157  for (x = 0; x < w4; x++) {
2158  mv[x].ref[0] = b->ref[0];
2159  mv[x].ref[1] = -1;
2160  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2161  }
2162  }
2163  }
2164 }
2165 
2166 // FIXME merge cnt/eob arguments?
2167 static av_always_inline int
2168 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2169  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2170  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2171  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2172  const int16_t *band_counts, const int16_t *qmul)
2173 {
2174  int i = 0, band = 0, band_left = band_counts[band];
2175  uint8_t *tp = p[0][nnz];
2176  uint8_t cache[1024];
2177 
2178  do {
2179  int val, rc;
2180 
2181  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2182  eob[band][nnz][val]++;
2183  if (!val)
2184  break;
2185 
2186  skip_eob:
2187  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2188  cnt[band][nnz][0]++;
2189  if (!--band_left)
2190  band_left = band_counts[++band];
2191  cache[scan[i]] = 0;
2192  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2193  tp = p[band][nnz];
2194  if (++i == n_coeffs)
2195  break; //invalid input; blocks should end with EOB
2196  goto skip_eob;
2197  }
2198 
2199  rc = scan[i];
2200  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2201  cnt[band][nnz][1]++;
2202  val = 1;
2203  cache[rc] = 1;
2204  } else {
2205  // fill in p[3-10] (model fill) - only once per frame for each pos
2206  if (!tp[3])
2207  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2208 
2209  cnt[band][nnz][2]++;
2210  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2211  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2212  cache[rc] = val = 2;
2213  } else {
2214  val = 3 + vp56_rac_get_prob(c, tp[5]);
2215  cache[rc] = 3;
2216  }
2217  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2218  cache[rc] = 4;
2219  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2220  val = 5 + vp56_rac_get_prob(c, 159);
2221  } else {
2222  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2223  val += vp56_rac_get_prob(c, 145);
2224  }
2225  } else { // cat 3-6
2226  cache[rc] = 5;
2227  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2228  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2229  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2230  val += (vp56_rac_get_prob(c, 148) << 1);
2231  val += vp56_rac_get_prob(c, 140);
2232  } else {
2233  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2234  val += (vp56_rac_get_prob(c, 155) << 2);
2235  val += (vp56_rac_get_prob(c, 140) << 1);
2236  val += vp56_rac_get_prob(c, 135);
2237  }
2238  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2239  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2240  val += (vp56_rac_get_prob(c, 157) << 3);
2241  val += (vp56_rac_get_prob(c, 141) << 2);
2242  val += (vp56_rac_get_prob(c, 134) << 1);
2243  val += vp56_rac_get_prob(c, 130);
2244  } else {
2245  val = 67;
2246  if (!is8bitsperpixel) {
2247  if (bpp == 12) {
2248  val += vp56_rac_get_prob(c, 255) << 17;
2249  val += vp56_rac_get_prob(c, 255) << 16;
2250  }
2251  val += (vp56_rac_get_prob(c, 255) << 15);
2252  val += (vp56_rac_get_prob(c, 255) << 14);
2253  }
2254  val += (vp56_rac_get_prob(c, 254) << 13);
2255  val += (vp56_rac_get_prob(c, 254) << 12);
2256  val += (vp56_rac_get_prob(c, 254) << 11);
2257  val += (vp56_rac_get_prob(c, 252) << 10);
2258  val += (vp56_rac_get_prob(c, 249) << 9);
2259  val += (vp56_rac_get_prob(c, 243) << 8);
2260  val += (vp56_rac_get_prob(c, 230) << 7);
2261  val += (vp56_rac_get_prob(c, 196) << 6);
2262  val += (vp56_rac_get_prob(c, 177) << 5);
2263  val += (vp56_rac_get_prob(c, 153) << 4);
2264  val += (vp56_rac_get_prob(c, 140) << 3);
2265  val += (vp56_rac_get_prob(c, 133) << 2);
2266  val += (vp56_rac_get_prob(c, 130) << 1);
2267  val += vp56_rac_get_prob(c, 129);
2268  }
2269  }
2270  }
2271 #define STORE_COEF(c, i, v) do { \
2272  if (is8bitsperpixel) { \
2273  c[i] = v; \
2274  } else { \
2275  AV_WN32A(&c[i * 2], v); \
2276  } \
2277 } while (0)
2278  if (!--band_left)
2279  band_left = band_counts[++band];
2280  if (is_tx32x32)
2281  STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2282  else
2283  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2284  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2285  tp = p[band][nnz];
2286  } while (++i < n_coeffs);
2287 
2288  return i;
2289 }
2290 
2291 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2292  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2293  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2294  const int16_t (*nb)[2], const int16_t *band_counts,
2295  const int16_t *qmul)
2296 {
2297  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2298  nnz, scan, nb, band_counts, qmul);
2299 }
2300 
2301 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2302  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2303  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2304  const int16_t (*nb)[2], const int16_t *band_counts,
2305  const int16_t *qmul)
2306 {
2307  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2308  nnz, scan, nb, band_counts, qmul);
2309 }
2310 
2311 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2312  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2313  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2314  const int16_t (*nb)[2], const int16_t *band_counts,
2315  const int16_t *qmul)
2316 {
2317  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2318  nnz, scan, nb, band_counts, qmul);
2319 }
2320 
2321 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2322  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2323  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2324  const int16_t (*nb)[2], const int16_t *band_counts,
2325  const int16_t *qmul)
2326 {
2327  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2328  nnz, scan, nb, band_counts, qmul);
2329 }
2330 
2331 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2332 {
2333  VP9Context *s = ctx->priv_data;
2334  VP9Block *b = s->b;
2335  int row = s->row, col = s->col;
2336  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2337  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2338  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2339  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2340  int end_x = FFMIN(2 * (s->cols - col), w4);
2341  int end_y = FFMIN(2 * (s->rows - row), h4);
2342  int n, pl, x, y, res;
2343  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2344  int tx = 4 * s->lossless + b->tx;
2345  const int16_t * const *yscans = vp9_scans[tx];
2346  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2347  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2348  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2349  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2350  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2351  static const int16_t band_counts[4][8] = {
2352  { 1, 2, 3, 4, 3, 16 - 13 },
2353  { 1, 2, 3, 4, 11, 64 - 21 },
2354  { 1, 2, 3, 4, 11, 256 - 21 },
2355  { 1, 2, 3, 4, 11, 1024 - 21 },
2356  };
2357  const int16_t *y_band_counts = band_counts[b->tx];
2358  const int16_t *uv_band_counts = band_counts[b->uvtx];
2359  int bytesperpixel = is8bitsperpixel ? 1 : 2;
2360  int total_coeff = 0;
2361 
2362 #define MERGE(la, end, step, rd) \
2363  for (n = 0; n < end; n += step) \
2364  la[n] = !!rd(&la[n])
2365 #define MERGE_CTX(step, rd) \
2366  do { \
2367  MERGE(l, end_y, step, rd); \
2368  MERGE(a, end_x, step, rd); \
2369  } while (0)
2370 
2371 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2372  for (n = 0, y = 0; y < end_y; y += step) { \
2373  for (x = 0; x < end_x; x += step, n += step * step) { \
2374  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2375  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2376  (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2377  c, e, p, a[x] + l[y], yscans[txtp], \
2378  ynbs[txtp], y_band_counts, qmul[0]); \
2379  a[x] = l[y] = !!res; \
2380  total_coeff |= !!res; \
2381  if (step >= 4) { \
2382  AV_WN16A(&s->eob[n], res); \
2383  } else { \
2384  s->eob[n] = res; \
2385  } \
2386  } \
2387  }
2388 
2389 #define SPLAT(la, end, step, cond) \
2390  if (step == 2) { \
2391  for (n = 1; n < end; n += step) \
2392  la[n] = la[n - 1]; \
2393  } else if (step == 4) { \
2394  if (cond) { \
2395  for (n = 0; n < end; n += step) \
2396  AV_WN32A(&la[n], la[n] * 0x01010101); \
2397  } else { \
2398  for (n = 0; n < end; n += step) \
2399  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2400  } \
2401  } else /* step == 8 */ { \
2402  if (cond) { \
2403  if (HAVE_FAST_64BIT) { \
2404  for (n = 0; n < end; n += step) \
2405  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2406  } else { \
2407  for (n = 0; n < end; n += step) { \
2408  uint32_t v32 = la[n] * 0x01010101; \
2409  AV_WN32A(&la[n], v32); \
2410  AV_WN32A(&la[n + 4], v32); \
2411  } \
2412  } \
2413  } else { \
2414  for (n = 0; n < end; n += step) \
2415  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2416  } \
2417  }
2418 #define SPLAT_CTX(step) \
2419  do { \
2420  SPLAT(a, end_x, step, end_x == w4); \
2421  SPLAT(l, end_y, step, end_y == h4); \
2422  } while (0)
2423 
2424  /* y tokens */
2425  switch (b->tx) {
2426  case TX_4X4:
2427  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2428  break;
2429  case TX_8X8:
2430  MERGE_CTX(2, AV_RN16A);
2431  DECODE_Y_COEF_LOOP(2, 0,);
2432  SPLAT_CTX(2);
2433  break;
2434  case TX_16X16:
2435  MERGE_CTX(4, AV_RN32A);
2436  DECODE_Y_COEF_LOOP(4, 0,);
2437  SPLAT_CTX(4);
2438  break;
2439  case TX_32X32:
2440  MERGE_CTX(8, AV_RN64A);
2441  DECODE_Y_COEF_LOOP(8, 0, 32);
2442  SPLAT_CTX(8);
2443  break;
2444  }
2445 
2446 #define DECODE_UV_COEF_LOOP(step, v) \
2447  for (n = 0, y = 0; y < end_y; y += step) { \
2448  for (x = 0; x < end_x; x += step, n += step * step) { \
2449  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2450  (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2451  16 * step * step, c, e, p, a[x] + l[y], \
2452  uvscan, uvnb, uv_band_counts, qmul[1]); \
2453  a[x] = l[y] = !!res; \
2454  total_coeff |= !!res; \
2455  if (step >= 4) { \
2456  AV_WN16A(&s->uveob[pl][n], res); \
2457  } else { \
2458  s->uveob[pl][n] = res; \
2459  } \
2460  } \
2461  }
2462 
2463  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2464  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2465  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2466  w4 >>= s->ss_h;
2467  end_x >>= s->ss_h;
2468  h4 >>= s->ss_v;
2469  end_y >>= s->ss_v;
2470  for (pl = 0; pl < 2; pl++) {
2471  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2472  l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2473  switch (b->uvtx) {
2474  case TX_4X4:
2475  DECODE_UV_COEF_LOOP(1,);
2476  break;
2477  case TX_8X8:
2478  MERGE_CTX(2, AV_RN16A);
2479  DECODE_UV_COEF_LOOP(2,);
2480  SPLAT_CTX(2);
2481  break;
2482  case TX_16X16:
2483  MERGE_CTX(4, AV_RN32A);
2484  DECODE_UV_COEF_LOOP(4,);
2485  SPLAT_CTX(4);
2486  break;
2487  case TX_32X32:
2488  MERGE_CTX(8, AV_RN64A);
2489  DECODE_UV_COEF_LOOP(8, 32);
2490  SPLAT_CTX(8);
2491  break;
2492  }
2493  }
2494 
2495  return total_coeff;
2496 }
2497 
2499 {
2500  return decode_coeffs(ctx, 1);
2501 }
2502 
2504 {
2505  return decode_coeffs(ctx, 0);
2506 }
2507 
2509  uint8_t *dst_edge, ptrdiff_t stride_edge,
2510  uint8_t *dst_inner, ptrdiff_t stride_inner,
2511  uint8_t *l, int col, int x, int w,
2512  int row, int y, enum TxfmMode tx,
2513  int p, int ss_h, int ss_v, int bytesperpixel)
2514 {
2515  int have_top = row > 0 || y > 0;
2516  int have_left = col > s->tiling.tile_col_start || x > 0;
2517  int have_right = x < w - 1;
2518  int bpp = s->bpp;
2519  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2520  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2521  { DC_127_PRED, VERT_PRED } },
2522  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2523  { HOR_PRED, HOR_PRED } },
2524  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2525  { LEFT_DC_PRED, DC_PRED } },
2535  { DC_127_PRED, VERT_LEFT_PRED } },
2536  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2537  { HOR_UP_PRED, HOR_UP_PRED } },
2538  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2539  { HOR_PRED, TM_VP8_PRED } },
2540  };
2541  static const struct {
2542  uint8_t needs_left:1;
2543  uint8_t needs_top:1;
2544  uint8_t needs_topleft:1;
2545  uint8_t needs_topright:1;
2546  uint8_t invert_left:1;
2547  } edges[N_INTRA_PRED_MODES] = {
2548  [VERT_PRED] = { .needs_top = 1 },
2549  [HOR_PRED] = { .needs_left = 1 },
2550  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2551  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2552  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2553  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2554  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2555  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2556  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2557  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2558  [LEFT_DC_PRED] = { .needs_left = 1 },
2559  [TOP_DC_PRED] = { .needs_top = 1 },
2560  [DC_128_PRED] = { 0 },
2561  [DC_127_PRED] = { 0 },
2562  [DC_129_PRED] = { 0 }
2563  };
2564 
2565  av_assert2(mode >= 0 && mode < 10);
2566  mode = mode_conv[mode][have_left][have_top];
2567  if (edges[mode].needs_top) {
2568  uint8_t *top, *topleft;
2569  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2570  int n_px_need_tr = 0;
2571 
2572  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2573  n_px_need_tr = 4;
2574 
2575  // if top of sb64-row, use s->intra_pred_data[] instead of
2576  // dst[-stride] for intra prediction (it contains pre- instead of
2577  // post-loopfilter data)
2578  if (have_top) {
2579  top = !(row & 7) && !y ?
2580  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2581  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2582  if (have_left)
2583  topleft = !(row & 7) && !y ?
2584  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2585  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2586  &dst_inner[-stride_inner];
2587  }
2588 
2589  if (have_top &&
2590  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2591  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2592  n_px_need + n_px_need_tr <= n_px_have) {
2593  *a = top;
2594  } else {
2595  if (have_top) {
2596  if (n_px_need <= n_px_have) {
2597  memcpy(*a, top, n_px_need * bytesperpixel);
2598  } else {
2599 #define memset_bpp(c, i1, v, i2, num) do { \
2600  if (bytesperpixel == 1) { \
2601  memset(&(c)[(i1)], (v)[(i2)], (num)); \
2602  } else { \
2603  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2604  for (n = 0; n < (num); n++) { \
2605  AV_WN16A(&(c)[((i1) + n) * 2], val); \
2606  } \
2607  } \
2608 } while (0)
2609  memcpy(*a, top, n_px_have * bytesperpixel);
2610  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2611  }
2612  } else {
2613 #define memset_val(c, val, num) do { \
2614  if (bytesperpixel == 1) { \
2615  memset((c), (val), (num)); \
2616  } else { \
2617  int n; \
2618  for (n = 0; n < (num); n++) { \
2619  AV_WN16A(&(c)[n * 2], (val)); \
2620  } \
2621  } \
2622 } while (0)
2623  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2624  }
2625  if (edges[mode].needs_topleft) {
2626  if (have_left && have_top) {
2627 #define assign_bpp(c, i1, v, i2) do { \
2628  if (bytesperpixel == 1) { \
2629  (c)[(i1)] = (v)[(i2)]; \
2630  } else { \
2631  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2632  } \
2633 } while (0)
2634  assign_bpp(*a, -1, topleft, -1);
2635  } else {
2636 #define assign_val(c, i, v) do { \
2637  if (bytesperpixel == 1) { \
2638  (c)[(i)] = (v); \
2639  } else { \
2640  AV_WN16A(&(c)[(i) * 2], (v)); \
2641  } \
2642 } while (0)
2643  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2644  }
2645  }
2646  if (tx == TX_4X4 && edges[mode].needs_topright) {
2647  if (have_top && have_right &&
2648  n_px_need + n_px_need_tr <= n_px_have) {
2649  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2650  } else {
2651  memset_bpp(*a, 4, *a, 3, 4);
2652  }
2653  }
2654  }
2655  }
2656  if (edges[mode].needs_left) {
2657  if (have_left) {
2658  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2659  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2660  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2661 
2662  if (edges[mode].invert_left) {
2663  if (n_px_need <= n_px_have) {
2664  for (i = 0; i < n_px_need; i++)
2665  assign_bpp(l, i, &dst[i * stride], -1);
2666  } else {
2667  for (i = 0; i < n_px_have; i++)
2668  assign_bpp(l, i, &dst[i * stride], -1);
2669  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2670  }
2671  } else {
2672  if (n_px_need <= n_px_have) {
2673  for (i = 0; i < n_px_need; i++)
2674  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2675  } else {
2676  for (i = 0; i < n_px_have; i++)
2677  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2678  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2679  }
2680  }
2681  } else {
2682  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2683  }
2684  }
2685 
2686  return mode;
2687 }
2688 
2689 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2690  ptrdiff_t uv_off, int bytesperpixel)
2691 {
2692  VP9Context *s = ctx->priv_data;
2693  VP9Block *b = s->b;
2694  int row = s->row, col = s->col;
2695  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2696  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2697  int end_x = FFMIN(2 * (s->cols - col), w4);
2698  int end_y = FFMIN(2 * (s->rows - row), h4);
2699  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2700  int uvstep1d = 1 << b->uvtx, p;
2701  uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2702  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2703  LOCAL_ALIGNED_32(uint8_t, l, [64]);
2704 
2705  for (n = 0, y = 0; y < end_y; y += step1d) {
2706  uint8_t *ptr = dst, *ptr_r = dst_r;
2707  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2708  ptr_r += 4 * step1d * bytesperpixel, n += step) {
2709  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2710  y * 2 + x : 0];
2711  uint8_t *a = &a_buf[32];
2712  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2713  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2714 
2715  mode = check_intra_mode(s, mode, &a, ptr_r,
2716  s->frames[CUR_FRAME].tf.f->linesize[0],
2717  ptr, s->y_stride, l,
2718  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2719  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2720  if (eob)
2721  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2722  s->block + 16 * n * bytesperpixel, eob);
2723  }
2724  dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2725  dst += 4 * step1d * s->y_stride;
2726  }
2727 
2728  // U/V
2729  w4 >>= s->ss_h;
2730  end_x >>= s->ss_h;
2731  end_y >>= s->ss_v;
2732  step = 1 << (b->uvtx * 2);
2733  for (p = 0; p < 2; p++) {
2734  dst = s->dst[1 + p];
2735  dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2736  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2737  uint8_t *ptr = dst, *ptr_r = dst_r;
2738  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2739  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2740  int mode = b->uvmode;
2741  uint8_t *a = &a_buf[32];
2742  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2743 
2744  mode = check_intra_mode(s, mode, &a, ptr_r,
2745  s->frames[CUR_FRAME].tf.f->linesize[1],
2746  ptr, s->uv_stride, l, col, x, w4, row, y,
2747  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2748  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2749  if (eob)
2750  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2751  s->uvblock[p] + 16 * n * bytesperpixel, eob);
2752  }
2753  dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2754  dst += 4 * uvstep1d * s->uv_stride;
2755  }
2756  }
2757 }
2758 
2759 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2760 {
2761  intra_recon(ctx, y_off, uv_off, 1);
2762 }
2763 
2764 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2765 {
2766  intra_recon(ctx, y_off, uv_off, 2);
2767 }
2768 
2770  uint8_t *dst, ptrdiff_t dst_stride,
2771  const uint8_t *ref, ptrdiff_t ref_stride,
2773  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2774  int px, int py, int pw, int ph,
2775  int bw, int bh, int w, int h, int bytesperpixel,
2776  const uint16_t *scale, const uint8_t *step)
2777 {
2778 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2779  int mx, my;
2780  int refbw_m1, refbh_m1;
2781  int th;
2782  VP56mv mv;
2783 
2784  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2785  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2786  // BUG libvpx seems to scale the two components separately. This introduces
2787  // rounding errors but we have to reproduce them to be exactly compatible
2788  // with the output from libvpx...
2789  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2790  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2791 
2792  y = my >> 4;
2793  x = mx >> 4;
2794  ref += y * ref_stride + x * bytesperpixel;
2795  mx &= 15;
2796  my &= 15;
2797  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2798  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2799  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2800  // we use +7 because the last 7 pixels of each sbrow can be changed in
2801  // the longest loopfilter of the next sbrow
2802  th = (y + refbh_m1 + 4 + 7) >> 6;
2803  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2804  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2806  ref - 3 * ref_stride - 3 * bytesperpixel,
2807  288, ref_stride,
2808  refbw_m1 + 8, refbh_m1 + 8,
2809  x - 3, y - 3, w, h);
2810  ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2811  ref_stride = 288;
2812  }
2813  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2814 }
2815 
2817  uint8_t *dst_u, uint8_t *dst_v,
2818  ptrdiff_t dst_stride,
2819  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2820  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2822  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2823  int px, int py, int pw, int ph,
2824  int bw, int bh, int w, int h, int bytesperpixel,
2825  const uint16_t *scale, const uint8_t *step)
2826 {
2827  int mx, my;
2828  int refbw_m1, refbh_m1;
2829  int th;
2830  VP56mv mv;
2831 
2832  if (s->ss_h) {
2833  // BUG https://code.google.com/p/webm/issues/detail?id=820
2834  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2835  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2836  } else {
2837  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2838  mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2839  }
2840  if (s->ss_v) {
2841  // BUG https://code.google.com/p/webm/issues/detail?id=820
2842  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2843  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2844  } else {
2845  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2846  my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2847  }
2848 #undef scale_mv
2849  y = my >> 4;
2850  x = mx >> 4;
2851  ref_u += y * src_stride_u + x * bytesperpixel;
2852  ref_v += y * src_stride_v + x * bytesperpixel;
2853  mx &= 15;
2854  my &= 15;
2855  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2856  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2857  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2858  // we use +7 because the last 7 pixels of each sbrow can be changed in
2859  // the longest loopfilter of the next sbrow
2860  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2861  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2862  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2864  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2865  288, src_stride_u,
2866  refbw_m1 + 8, refbh_m1 + 8,
2867  x - 3, y - 3, w, h);
2868  ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2869  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2870 
2872  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2873  288, src_stride_v,
2874  refbw_m1 + 8, refbh_m1 + 8,
2875  x - 3, y - 3, w, h);
2876  ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2877  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2878  } else {
2879  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2880  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2881  }
2882 }
2883 
2884 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2885  px, py, pw, ph, bw, bh, w, h, i) \
2886  mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2887  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2888  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2889 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2890  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2891  mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2892  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2893  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2894 #define SCALED 1
2895 #define FN(x) x##_scaled_8bpp
2896 #define BYTES_PER_PIXEL 1
2897 #include "vp9_mc_template.c"
2898 #undef FN
2899 #undef BYTES_PER_PIXEL
2900 #define FN(x) x##_scaled_16bpp
2901 #define BYTES_PER_PIXEL 2
2902 #include "vp9_mc_template.c"
2903 #undef mc_luma_dir
2904 #undef mc_chroma_dir
2905 #undef FN
2906 #undef BYTES_PER_PIXEL
2907 #undef SCALED
2908 
2910  uint8_t *dst, ptrdiff_t dst_stride,
2911  const uint8_t *ref, ptrdiff_t ref_stride,
2913  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2914  int bw, int bh, int w, int h, int bytesperpixel)
2915 {
2916  int mx = mv->x, my = mv->y, th;
2917 
2918  y += my >> 3;
2919  x += mx >> 3;
2920  ref += y * ref_stride + x * bytesperpixel;
2921  mx &= 7;
2922  my &= 7;
2923  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2924  // we use +7 because the last 7 pixels of each sbrow can be changed in
2925  // the longest loopfilter of the next sbrow
2926  th = (y + bh + 4 * !!my + 7) >> 6;
2927  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2928  if (x < !!mx * 3 || y < !!my * 3 ||
2929  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2931  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2932  160, ref_stride,
2933  bw + !!mx * 7, bh + !!my * 7,
2934  x - !!mx * 3, y - !!my * 3, w, h);
2935  ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2936  ref_stride = 160;
2937  }
2938  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2939 }
2940 
2942  uint8_t *dst_u, uint8_t *dst_v,
2943  ptrdiff_t dst_stride,
2944  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2945  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2947  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2948  int bw, int bh, int w, int h, int bytesperpixel)
2949 {
2950  int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2951 
2952  y += my >> 4;
2953  x += mx >> 4;
2954  ref_u += y * src_stride_u + x * bytesperpixel;
2955  ref_v += y * src_stride_v + x * bytesperpixel;
2956  mx &= 15;
2957  my &= 15;
2958  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2959  // we use +7 because the last 7 pixels of each sbrow can be changed in
2960  // the longest loopfilter of the next sbrow
2961  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2962  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2963  if (x < !!mx * 3 || y < !!my * 3 ||
2964  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2966  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2967  160, src_stride_u,
2968  bw + !!mx * 7, bh + !!my * 7,
2969  x - !!mx * 3, y - !!my * 3, w, h);
2970  ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2972 
2974  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2975  160, src_stride_v,
2976  bw + !!mx * 7, bh + !!my * 7,
2977  x - !!mx * 3, y - !!my * 3, w, h);
2978  ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2979  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2980  } else {
2981  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2982  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2983  }
2984 }
2985 
2986 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2987  px, py, pw, ph, bw, bh, w, h, i) \
2988  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2989  mv, bw, bh, w, h, bytesperpixel)
2990 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2991  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2992  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2993  row, col, mv, bw, bh, w, h, bytesperpixel)
2994 #define SCALED 0
2995 #define FN(x) x##_8bpp
2996 #define BYTES_PER_PIXEL 1
2997 #include "vp9_mc_template.c"
2998 #undef FN
2999 #undef BYTES_PER_PIXEL
3000 #define FN(x) x##_16bpp
3001 #define BYTES_PER_PIXEL 2
3002 #include "vp9_mc_template.c"
3003 #undef mc_luma_dir_dir
3004 #undef mc_chroma_dir_dir
3005 #undef FN
3006 #undef BYTES_PER_PIXEL
3007 #undef SCALED
3008 
3009 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3010 {
3011  VP9Context *s = ctx->priv_data;
3012  VP9Block *b = s->b;
3013  int row = s->row, col = s->col;
3014 
3015  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3016  if (bytesperpixel == 1) {
3017  inter_pred_scaled_8bpp(ctx);
3018  } else {
3019  inter_pred_scaled_16bpp(ctx);
3020  }
3021  } else {
3022  if (bytesperpixel == 1) {
3023  inter_pred_8bpp(ctx);
3024  } else {
3025  inter_pred_16bpp(ctx);
3026  }
3027  }
3028  if (!b->skip) {
3029  /* mostly copied intra_recon() */
3030 
3031  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3032  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3033  int end_x = FFMIN(2 * (s->cols - col), w4);
3034  int end_y = FFMIN(2 * (s->rows - row), h4);
3035  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3036  int uvstep1d = 1 << b->uvtx, p;
3037  uint8_t *dst = s->dst[0];
3038 
3039  // y itxfm add
3040  for (n = 0, y = 0; y < end_y; y += step1d) {
3041  uint8_t *ptr = dst;
3042  for (x = 0; x < end_x; x += step1d,
3043  ptr += 4 * step1d * bytesperpixel, n += step) {
3044  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3045 
3046  if (eob)
3047  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3048  s->block + 16 * n * bytesperpixel, eob);
3049  }
3050  dst += 4 * s->y_stride * step1d;
3051  }
3052 
3053  // uv itxfm add
3054  end_x >>= s->ss_h;
3055  end_y >>= s->ss_v;
3056  step = 1 << (b->uvtx * 2);
3057  for (p = 0; p < 2; p++) {
3058  dst = s->dst[p + 1];
3059  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3060  uint8_t *ptr = dst;
3061  for (x = 0; x < end_x; x += uvstep1d,
3062  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3063  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3064 
3065  if (eob)
3066  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3067  s->uvblock[p] + 16 * n * bytesperpixel, eob);
3068  }
3069  dst += 4 * uvstep1d * s->uv_stride;
3070  }
3071  }
3072  }
3073 }
3074 
3076 {
3077  inter_recon(ctx, 1);
3078 }
3079 
3081 {
3082  inter_recon(ctx, 2);
3083 }
3084 
3085 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3086  int row_and_7, int col_and_7,
3087  int w, int h, int col_end, int row_end,
3088  enum TxfmMode tx, int skip_inter)
3089 {
3090  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3091  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3092 
3093  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3094  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3095  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3096  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3097 
3098  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3099  // edges. This means that for UV, we work on two subsampled blocks at
3100  // a time, and we only use the topleft block's mode information to set
3101  // things like block strength. Thus, for any block size smaller than
3102  // 16x16, ignore the odd portion of the block.
3103  if (tx == TX_4X4 && (ss_v | ss_h)) {
3104  if (h == ss_v) {
3105  if (row_and_7 & 1)
3106  return;
3107  if (!row_end)
3108  h += 1;
3109  }
3110  if (w == ss_h) {
3111  if (col_and_7 & 1)
3112  return;
3113  if (!col_end)
3114  w += 1;
3115  }
3116  }
3117 
3118  if (tx == TX_4X4 && !skip_inter) {
3119  int t = 1 << col_and_7, m_col = (t << w) - t, y;
3120  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3121  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3122 
3123  for (y = row_and_7; y < h + row_and_7; y++) {
3124  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3125 
3126  mask[0][y][1] |= m_row_8;
3127  mask[0][y][2] |= m_row_4;
3128  // for odd lines, if the odd col is not being filtered,
3129  // skip odd row also:
3130  // .---. <-- a
3131  // | |
3132  // |___| <-- b
3133  // ^ ^
3134  // c d
3135  //
3136  // if a/c are even row/col and b/d are odd, and d is skipped,
3137  // e.g. right edge of size-66x66.webm, then skip b also (bug)
3138  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3139  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3140  } else {
3141  mask[1][y][col_mask_id] |= m_col;
3142  }
3143  if (!ss_h)
3144  mask[0][y][3] |= m_col;
3145  if (!ss_v) {
3146  if (ss_h && (col_end & 1))
3147  mask[1][y][3] |= (t << (w - 1)) - t;
3148  else
3149  mask[1][y][3] |= m_col;
3150  }
3151  }
3152  } else {
3153  int y, t = 1 << col_and_7, m_col = (t << w) - t;
3154 
3155  if (!skip_inter) {
3156  int mask_id = (tx == TX_8X8);
3157  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3158  int l2 = tx + ss_h - 1, step1d;
3159  int m_row = m_col & masks[l2];
3160 
3161  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3162  // 8wd loopfilter to prevent going off the visible edge.
3163  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3164  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3165  int m_row_8 = m_row - m_row_16;
3166 
3167  for (y = row_and_7; y < h + row_and_7; y++) {
3168  mask[0][y][0] |= m_row_16;
3169  mask[0][y][1] |= m_row_8;
3170  }
3171  } else {
3172  for (y = row_and_7; y < h + row_and_7; y++)
3173  mask[0][y][mask_id] |= m_row;
3174  }
3175 
3176  l2 = tx + ss_v - 1;
3177  step1d = 1 << l2;
3178  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3179  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3180  mask[1][y][0] |= m_col;
3181  if (y - row_and_7 == h - 1)
3182  mask[1][y][1] |= m_col;
3183  } else {
3184  for (y = row_and_7; y < h + row_and_7; y += step1d)
3185  mask[1][y][mask_id] |= m_col;
3186  }
3187  } else if (tx != TX_4X4) {
3188  int mask_id;
3189 
3190  mask_id = (tx == TX_8X8) || (h == ss_v);
3191  mask[1][row_and_7][mask_id] |= m_col;
3192  mask_id = (tx == TX_8X8) || (w == ss_h);
3193  for (y = row_and_7; y < h + row_and_7; y++)
3194  mask[0][y][mask_id] |= t;
3195  } else {
3196  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3197 
3198  for (y = row_and_7; y < h + row_and_7; y++) {
3199  mask[0][y][2] |= t4;
3200  mask[0][y][1] |= t8;
3201  }
3202  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3203  }
3204  }
3205 }
3206 
3207 static void decode_b(AVCodecContext *ctx, int row, int col,
3208  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3209  enum BlockLevel bl, enum BlockPartition bp)
3210 {
3211  VP9Context *s = ctx->priv_data;
3212  VP9Block *b = s->b;
3213  enum BlockSize bs = bl * 3 + bp;
3214  int bytesperpixel = s->bytesperpixel;
3215  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3216  int emu[2];
3217  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3218 
3219  s->row = row;
3220  s->row7 = row & 7;
3221  s->col = col;
3222  s->col7 = col & 7;
3223  s->min_mv.x = -(128 + col * 64);
3224  s->min_mv.y = -(128 + row * 64);
3225  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3226  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3227  if (s->pass < 2) {
3228  b->bs = bs;
3229  b->bl = bl;
3230  b->bp = bp;
3231  decode_mode(ctx);
3232  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3233  (s->ss_v && h4 * 2 == (1 << b->tx)));
3234 
3235  if (!b->skip) {
3236  int has_coeffs;
3237 
3238  if (bytesperpixel == 1) {
3239  has_coeffs = decode_coeffs_8bpp(ctx);
3240  } else {
3241  has_coeffs = decode_coeffs_16bpp(ctx);
3242  }
3243  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3244  b->skip = 1;
3245  memset(&s->above_skip_ctx[col], 1, w4);
3246  memset(&s->left_skip_ctx[s->row7], 1, h4);
3247  }
3248  } else {
3249  int row7 = s->row7;
3250 
3251 #define SPLAT_ZERO_CTX(v, n) \
3252  switch (n) { \
3253  case 1: v = 0; break; \
3254  case 2: AV_ZERO16(&v); break; \
3255  case 4: AV_ZERO32(&v); break; \
3256  case 8: AV_ZERO64(&v); break; \
3257  case 16: AV_ZERO128(&v); break; \
3258  }
3259 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3260  do { \
3261  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3262  if (s->ss_##dir2) { \
3263  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3264  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3265  } else { \
3266  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3267  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3268  } \
3269  } while (0)
3270 
3271  switch (w4) {
3272  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3273  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3274  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3275  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3276  }
3277  switch (h4) {
3278  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3279  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3280  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3281  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3282  }
3283  }
3284  if (s->pass == 1) {
3285  s->b++;
3286  s->block += w4 * h4 * 64 * bytesperpixel;
3287  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3288  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3289  s->eob += 4 * w4 * h4;
3290  s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3291  s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3292 
3293  return;
3294  }
3295  }
3296 
3297  // emulated overhangs if the stride of the target buffer can't hold. This
3298  // makes it possible to support emu-edge and so on even if we have large block
3299  // overhangs
3300  emu[0] = (col + w4) * 8 > f->linesize[0] ||
3301  (row + h4) > s->rows;
3302  emu[1] = (col + w4) * 4 > f->linesize[1] ||
3303  (row + h4) > s->rows;
3304  if (emu[0]) {
3305  s->dst[0] = s->tmp_y;
3306  s->y_stride = 128;
3307  } else {
3308  s->dst[0] = f->data[0] + yoff;
3309  s->y_stride = f->linesize[0];
3310  }
3311  if (emu[1]) {
3312  s->dst[1] = s->tmp_uv[0];
3313  s->dst[2] = s->tmp_uv[1];
3314  s->uv_stride = 128;
3315  } else {
3316  s->dst[1] = f->data[1] + uvoff;
3317  s->dst[2] = f->data[2] + uvoff;
3318  s->uv_stride = f->linesize[1];
3319  }
3320  if (b->intra) {
3321  if (s->bpp > 8) {
3322  intra_recon_16bpp(ctx, yoff, uvoff);
3323  } else {
3324  intra_recon_8bpp(ctx, yoff, uvoff);
3325  }
3326  } else {
3327  if (s->bpp > 8) {
3328  inter_recon_16bpp(ctx);
3329  } else {
3330  inter_recon_8bpp(ctx);
3331  }
3332  }
3333  if (emu[0]) {
3334  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3335 
3336  for (n = 0; o < w; n++) {
3337  int bw = 64 >> n;
3338 
3339  av_assert2(n <= 4);
3340  if (w & bw) {
3341  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3342  s->tmp_y + o, 128, h, 0, 0);
3343  o += bw * bytesperpixel;
3344  }
3345  }
3346  }
3347  if (emu[1]) {
3348  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3349  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3350 
3351  for (n = s->ss_h; o < w; n++) {
3352  int bw = 64 >> n;
3353 
3354  av_assert2(n <= 4);
3355  if (w & bw) {
3356  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3357  s->tmp_uv[0] + o, 128, h, 0, 0);
3358  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3359  s->tmp_uv[1] + o, 128, h, 0, 0);
3360  o += bw * bytesperpixel;
3361  }
3362  }
3363  }
3364 
3365  // pick filter level and find edges to apply filter to
3366  if (s->filter.level &&
3367  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3368  [b->mode[3] != ZEROMV]) > 0) {
3369  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3370  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3371 
3372  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3373  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3374  if (s->ss_h || s->ss_v)
3375  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3376  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3377  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3378  b->uvtx, skip_inter);
3379 
3380  if (!s->filter.lim_lut[lvl]) {
3381  int sharp = s->filter.sharpness;
3382  int limit = lvl;
3383 
3384  if (sharp > 0) {
3385  limit >>= (sharp + 3) >> 2;
3386  limit = FFMIN(limit, 9 - sharp);
3387  }
3388  limit = FFMAX(limit, 1);
3389 
3390  s->filter.lim_lut[lvl] = limit;
3391  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3392  }
3393  }
3394 
3395  if (s->pass == 2) {
3396  s->b++;
3397  s->block += w4 * h4 * 64 * bytesperpixel;
3398  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3399  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3400  s->eob += 4 * w4 * h4;
3401  s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3402  s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3403  }
3404 }
3405 
3406 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3407  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3408 {
3409  VP9Context *s = ctx->priv_data;
3410  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3411  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3412  const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3413  s->prob.p.partition[bl][c];
3414  enum BlockPartition bp;
3415  ptrdiff_t hbs = 4 >> bl;
3416  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3417  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3418  int bytesperpixel = s->bytesperpixel;
3419 
3420  if (bl == BL_8X8) {
3421  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3422  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423  } else if (col + hbs < s->cols) { // FIXME why not <=?
3424  if (row + hbs < s->rows) { // FIXME why not <=?
3425  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3426  switch (bp) {
3427  case PARTITION_NONE:
3428  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3429  break;
3430  case PARTITION_H:
3431  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432  yoff += hbs * 8 * y_stride;
3433  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3434  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3435  break;
3436  case PARTITION_V:
3437  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3438  yoff += hbs * 8 * bytesperpixel;
3439  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3440  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3441  break;
3442  case PARTITION_SPLIT:
3443  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3444  decode_sb(ctx, row, col + hbs, lflvl,
3445  yoff + 8 * hbs * bytesperpixel,
3446  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3447  yoff += hbs * 8 * y_stride;
3448  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3449  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3450  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3451  yoff + 8 * hbs * bytesperpixel,
3452  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3453  break;
3454  default:
3455  av_assert0(0);
3456  }
3457  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3458  bp = PARTITION_SPLIT;
3459  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3460  decode_sb(ctx, row, col + hbs, lflvl,
3461  yoff + 8 * hbs * bytesperpixel,
3462  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3463  } else {
3464  bp = PARTITION_H;
3465  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3466  }
3467  } else if (row + hbs < s->rows) { // FIXME why not <=?
3468  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3469  bp = PARTITION_SPLIT;
3470  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3471  yoff += hbs * 8 * y_stride;
3472  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3473  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3474  } else {
3475  bp = PARTITION_V;
3476  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3477  }
3478  } else {
3479  bp = PARTITION_SPLIT;
3480  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3481  }
3482  s->counts.partition[bl][c][bp]++;
3483 }
3484 
3485 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3486  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3487 {
3488  VP9Context *s = ctx->priv_data;
3489  VP9Block *b = s->b;
3490  ptrdiff_t hbs = 4 >> bl;
3491  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3492  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3493  int bytesperpixel = s->bytesperpixel;
3494 
3495  if (bl == BL_8X8) {
3496  av_assert2(b->bl == BL_8X8);
3497  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3498  } else if (s->b->bl == bl) {
3499  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3500  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3501  yoff += hbs * 8 * y_stride;
3502  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3503  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3504  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3505  yoff += hbs * 8 * bytesperpixel;
3506  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3507  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3508  }
3509  } else {
3510  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3511  if (col + hbs < s->cols) { // FIXME why not <=?
3512  if (row + hbs < s->rows) {
3513  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3514  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3515  yoff += hbs * 8 * y_stride;
3516  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3517  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3518  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3519  yoff + 8 * hbs * bytesperpixel,
3520  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3521  } else {
3522  yoff += hbs * 8 * bytesperpixel;
3523  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3524  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3525  }
3526  } else if (row + hbs < s->rows) {
3527  yoff += hbs * 8 * y_stride;
3528  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3529  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3530  }
3531  }
3532 }
3533 
3534 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3535  uint8_t *lvl, uint8_t (*mask)[4],
3536  uint8_t *dst, ptrdiff_t ls)
3537 {
3538  int y, x, bytesperpixel = s->bytesperpixel;
3539 
3540  // filter edges between columns (e.g. block1 | block2)
3541  for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3542  uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3543  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3544  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3545  unsigned hm = hm1 | hm2 | hm13 | hm23;
3546 
3547  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3548  if (col || x > 1) {
3549  if (hm1 & x) {
3550  int L = *l, H = L >> 4;
3551  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3552 
3553  if (hmask1[0] & x) {
3554  if (hmask2[0] & x) {
3555  av_assert2(l[8 << ss_v] == L);
3556  s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3557  } else {
3558  s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3559  }
3560  } else if (hm2 & x) {
3561  L = l[8 << ss_v];
3562  H |= (L >> 4) << 8;
3563  E |= s->filter.mblim_lut[L] << 8;
3564  I |= s->filter.lim_lut[L] << 8;
3565  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3566  [!!(hmask2[1] & x)]
3567  [0](ptr, ls, E, I, H);
3568  } else {
3569  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3570  [0](ptr, ls, E, I, H);
3571  }
3572  } else if (hm2 & x) {
3573  int L = l[8 << ss_v], H = L >> 4;
3574  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3575 
3576  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3577  [0](ptr + 8 * ls, ls, E, I, H);
3578  }
3579  }
3580  if (ss_h) {
3581  if (x & 0xAA)
3582  l += 2;
3583  } else {
3584  if (hm13 & x) {
3585  int L = *l, H = L >> 4;
3586  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3587 
3588  if (hm23 & x) {
3589  L = l[8 << ss_v];
3590  H |= (L >> 4) << 8;
3591  E |= s->filter.mblim_lut[L] << 8;
3592  I |= s->filter.lim_lut[L] << 8;
3593  s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3594  } else {
3595  s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3596  }
3597  } else if (hm23 & x) {
3598  int L = l[8 << ss_v], H = L >> 4;
3599  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3600 
3601  s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3602  }
3603  l++;
3604  }
3605  }
3606  }
3607 }
3608 
3609 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3610  uint8_t *lvl, uint8_t (*mask)[4],
3611  uint8_t *dst, ptrdiff_t ls)
3612 {
3613  int y, x, bytesperpixel = s->bytesperpixel;
3614 
3615  // block1
3616  // filter edges between rows (e.g. ------)
3617  // block2
3618  for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3619  uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3620  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3621 
3622  for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3623  if (row || y) {
3624  if (vm & x) {
3625  int L = *l, H = L >> 4;
3626  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3627 
3628  if (vmask[0] & x) {
3629  if (vmask[0] & (x << (1 + ss_h))) {
3630  av_assert2(l[1 + ss_h] == L);
3631  s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3632  } else {
3633  s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3634  }
3635  } else if (vm & (x << (1 + ss_h))) {
3636  L = l[1 + ss_h];
3637  H |= (L >> 4) << 8;
3638  E |= s->filter.mblim_lut[L] << 8;
3639  I |= s->filter.lim_lut[L] << 8;
3640  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3641  [!!(vmask[1] & (x << (1 + ss_h)))]
3642  [1](ptr, ls, E, I, H);
3643  } else {
3644  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3645  [1](ptr, ls, E, I, H);
3646  }
3647  } else if (vm & (x << (1 + ss_h))) {
3648  int L = l[1 + ss_h], H = L >> 4;
3649  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3650 
3651  s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3652  [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3653  }
3654  }
3655  if (!ss_v) {
3656  if (vm3 & x) {
3657  int L = *l, H = L >> 4;
3658  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3659 
3660  if (vm3 & (x << (1 + ss_h))) {
3661  L = l[1 + ss_h];
3662  H |= (L >> 4) << 8;
3663  E |= s->filter.mblim_lut[L] << 8;
3664  I |= s->filter.lim_lut[L] << 8;
3665  s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3666  } else {
3667  s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3668  }
3669  } else if (vm3 & (x << (1 + ss_h))) {
3670  int L = l[1 + ss_h], H = L >> 4;
3671  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3672 
3673  s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3674  }
3675  }
3676  }
3677  if (ss_v) {
3678  if (y & 1)
3679  lvl += 16;
3680  } else {
3681  lvl += 8;
3682  }
3683  }
3684 }
3685 
3686 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3687  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3688 {
3689  VP9Context *s = ctx->priv_data;
3690  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3691  uint8_t *dst = f->data[0] + yoff;
3692  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3693  uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3694  int p;
3695 
3696  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3697  // if you think of them as acting on a 8x8 block max, we can interleave
3698  // each v/h within the single x loop, but that only works if we work on
3699  // 8 pixel blocks, and we won't always do that (we want at least 16px
3700  // to use SSE2 optimizations, perhaps 32 for AVX2)
3701 
3702  filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3703  filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3704 
3705  for (p = 0; p < 2; p++) {
3706  dst = f->data[1 + p] + uvoff;
3707  filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3708  filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3709  }
3710 }
3711 
3712 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3713 {
3714  int sb_start = ( idx * n) >> log2_n;
3715  int sb_end = ((idx + 1) * n) >> log2_n;
3716  *start = FFMIN(sb_start, n) << 3;
3717  *end = FFMIN(sb_end, n) << 3;
3718 }
3719 
3720 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3721  int max_count, int update_factor)
3722 {
3723  unsigned ct = ct0 + ct1, p2, p1;
3724 
3725  if (!ct)
3726  return;
3727 
3728  p1 = *p;
3729  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3730  p2 = av_clip(p2, 1, 255);
3731  ct = FFMIN(ct, max_count);
3732  update_factor = FASTDIV(update_factor * ct, max_count);
3733 
3734  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3735  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3736 }
3737 
3738 static void adapt_probs(VP9Context *s)
3739 {
3740  int i, j, k, l, m;
3741  prob_context *p = &s->prob_ctx[s->framectxid].p;
3742  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3743 
3744  // coefficients
3745  for (i = 0; i < 4; i++)
3746  for (j = 0; j < 2; j++)
3747  for (k = 0; k < 2; k++)
3748  for (l = 0; l < 6; l++)
3749  for (m = 0; m < 6; m++) {
3750  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3751  unsigned *e = s->counts.eob[i][j][k][l][m];
3752  unsigned *c = s->counts.coef[i][j][k][l][m];
3753 
3754  if (l == 0 && m >= 3) // dc only has 3 pt
3755  break;
3756 
3757  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3758  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3759  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3760  }
3761 
3762  if (s->keyframe || s->intraonly) {
3763  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3764  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3765  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3766  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3767  return;
3768  }
3769 
3770  // skip flag
3771  for (i = 0; i < 3; i++)
3772  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3773 
3774  // intra/inter flag
3775  for (i = 0; i < 4; i++)
3776  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3777 
3778  // comppred flag
3779  if (s->comppredmode == PRED_SWITCHABLE) {
3780  for (i = 0; i < 5; i++)
3781  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3782  }
3783 
3784  // reference frames
3785  if (s->comppredmode != PRED_SINGLEREF) {
3786  for (i = 0; i < 5; i++)
3787  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3788  s->counts.comp_ref[i][1], 20, 128);
3789  }
3790 
3791  if (s->comppredmode != PRED_COMPREF) {
3792  for (i = 0; i < 5; i++) {
3793  uint8_t *pp = p->single_ref[i];
3794  unsigned (*c)[2] = s->counts.single_ref[i];
3795 
3796  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3797  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3798  }
3799  }
3800 
3801  // block partitioning
3802  for (i = 0; i < 4; i++)
3803  for (j = 0; j < 4; j++) {
3804  uint8_t *pp = p->partition[i][j];
3805  unsigned *c = s->counts.partition[i][j];
3806 
3807  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3808  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3809  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3810  }
3811 
3812  // tx size
3813  if (s->txfmmode == TX_SWITCHABLE) {
3814  for (i = 0; i < 2; i++) {
3815  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3816 
3817  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3818  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3819  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3820  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3821  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3822  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3823  }
3824  }
3825 
3826  // interpolation filter
3827  if (s->filtermode == FILTER_SWITCHABLE) {
3828  for (i = 0; i < 4; i++) {
3829  uint8_t *pp = p->filter[i];
3830  unsigned *c = s->counts.filter[i];
3831 
3832  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3833  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3834  }
3835  }
3836 
3837  // inter modes
3838  for (i = 0; i < 7; i++) {
3839  uint8_t *pp = p->mv_mode[i];
3840  unsigned *c = s->counts.mv_mode[i];
3841 
3842  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3843  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3844  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3845  }
3846 
3847  // mv joints
3848  {
3849  uint8_t *pp = p->mv_joint;
3850  unsigned *c = s->counts.mv_joint;
3851 
3852  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3853  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3854  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3855  }
3856 
3857  // mv components
3858  for (i = 0; i < 2; i++) {
3859  uint8_t *pp;
3860  unsigned *c, (*c2)[2], sum;
3861 
3862  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3863  s->counts.mv_comp[i].sign[1], 20, 128);
3864 
3865  pp = p->mv_comp[i].classes;
3866  c = s->counts.mv_comp[i].classes;
3867  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3868  adapt_prob(&pp[0], c[0], sum, 20, 128);
3869  sum -= c[1];
3870  adapt_prob(&pp[1], c[1], sum, 20, 128);
3871  sum -= c[2] + c[3];
3872  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3873  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3874  sum -= c[4] + c[5];
3875  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3876  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3877  sum -= c[6];
3878  adapt_prob(&pp[6], c[6], sum, 20, 128);
3879  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3880  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3881  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3882 
3883  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3884  s->counts.mv_comp[i].class0[1], 20, 128);
3885  pp = p->mv_comp[i].bits;
3886  c2 = s->counts.mv_comp[i].bits;
3887  for (j = 0; j < 10; j++)
3888  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3889 
3890  for (j = 0; j < 2; j++) {
3891  pp = p->mv_comp[i].class0_fp[j];
3892  c = s->counts.mv_comp[i].class0_fp[j];
3893  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3894  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3895  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3896  }
3897  pp = p->mv_comp[i].fp;
3898  c = s->counts.mv_comp[i].fp;
3899  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3900  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3901  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3902 
3903  if (s->highprecisionmvs) {
3904  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3905  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3906  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3907  s->counts.mv_comp[i].hp[1], 20, 128);
3908  }
3909  }
3910 
3911  // y intra modes
3912  for (i = 0; i < 4; i++) {
3913  uint8_t *pp = p->y_mode[i];
3914  unsigned *c = s->counts.y_mode[i], sum, s2;
3915 
3916  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3917  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3918  sum -= c[TM_VP8_PRED];
3919  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3920  sum -= c[VERT_PRED];
3921  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3922  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3923  sum -= s2;
3924  adapt_prob(&pp[3], s2, sum, 20, 128);
3925  s2 -= c[HOR_PRED];
3926  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3927  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3928  sum -= c[DIAG_DOWN_LEFT_PRED];
3929  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3930  sum -= c[VERT_LEFT_PRED];
3931  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3932  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3933  }
3934 
3935  // uv intra modes
3936  for (i = 0; i < 10; i++) {
3937  uint8_t *pp = p->uv_mode[i];
3938  unsigned *c = s->counts.uv_mode[i], sum, s2;
3939 
3940  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3941  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3942  sum -= c[TM_VP8_PRED];
3943  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3944  sum -= c[VERT_PRED];
3945  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3946  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3947  sum -= s2;
3948  adapt_prob(&pp[3], s2, sum, 20, 128);
3949  s2 -= c[HOR_PRED];
3950  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3951  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3952  sum -= c[DIAG_DOWN_LEFT_PRED];
3953  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3954  sum -= c[VERT_LEFT_PRED];
3955  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3956  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3957  }
3958 }
3959 
3960 static void free_buffers(VP9Context *s)
3961 {
3962  av_freep(&s->intra_pred_data[0]);
3963  av_freep(&s->b_base);
3964  av_freep(&s->block_base);
3965 }
3966 
3968 {
3969  VP9Context *s = ctx->priv_data;
3970  int i;
3971 
3972  for (i = 0; i < 3; i++) {
3973  if (s->frames[i].tf.f->data[0])
3974  vp9_unref_frame(ctx, &s->frames[i]);
3975  av_frame_free(&s->frames[i].tf.f);
3976  }
3977  for (i = 0; i < 8; i++) {
3978  if (s->refs[i].f->data[0])
3979  ff_thread_release_buffer(ctx, &s->refs[i]);
3980  av_frame_free(&s->refs[i].f);
3981  if (s->next_refs[i].f->data[0])
3982  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3983  av_frame_free(&s->next_refs[i].f);
3984  }
3985  free_buffers(s);
3986  av_freep(&s->c_b);
3987  s->c_b_size = 0;
3988 
3989  return 0;
3990 }
3991 
3992 
3993 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3994  int *got_frame, AVPacket *pkt)
3995 {
3996  const uint8_t *data = pkt->data;
3997  int size = pkt->size;
3998  VP9Context *s = ctx->priv_data;
3999  int res, tile_row, tile_col, i, ref, row, col;
4000  int retain_segmap_ref = s->frames[REF_FRAME_SEGMAP].segmentation_map &&
4002  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4003  AVFrame *f;
4004  int bytesperpixel;
4005 
4006  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4007  return res;
4008  } else if (res == 0) {
4009  if (!s->refs[ref].f->data[0]) {
4010  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4011  return AVERROR_INVALIDDATA;
4012  }
4013  if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4014  return res;
4015  ((AVFrame *)frame)->pkt_pts = pkt->pts;
4016  ((AVFrame *)frame)->pkt_dts = pkt->dts;
4017  for (i = 0; i < 8; i++) {
4018  if (s->next_refs[i].f->data[0])
4019  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4020  if (s->refs[i].f->data[0] &&
4021  (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4022  return res;
4023  }
4024  *got_frame = 1;
4025  return pkt->size;
4026  }
4027  data += res;
4028  size -= res;
4029 
4030  if (!retain_segmap_ref || s->keyframe || s->intraonly) {
4031  if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4033  if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4034  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4035  return res;
4036  }
4037  if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4039  if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4040  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4041  return res;
4042  if (s->frames[CUR_FRAME].tf.f->data[0])
4043  vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4044  if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4045  return res;
4046  f = s->frames[CUR_FRAME].tf.f;
4047  f->key_frame = s->keyframe;
4049  ls_y = f->linesize[0];
4050  ls_uv =f->linesize[1];
4051 
4052  // ref frame setup
4053  for (i = 0; i < 8; i++) {
4054  if (s->next_refs[i].f->data[0])
4055  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4056  if (s->refreshrefmask & (1 << i)) {
4057  res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4058  } else if (s->refs[i].f->data[0]) {
4059  res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4060  }
4061  if (res < 0)
4062  return res;
4063  }
4064 
4065  // main tile decode loop
4066  bytesperpixel = s->bytesperpixel;
4067  memset(s->above_partition_ctx, 0, s->cols);
4068  memset(s->above_skip_ctx, 0, s->cols);
4069  if (s->keyframe || s->intraonly) {
4070  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4071  } else {
4072  memset(s->above_mode_ctx, NEARESTMV, s->cols);
4073  }
4074  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4075  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4076  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4077  memset(s->above_segpred_ctx, 0, s->cols);
4078  s->pass = s->frames[CUR_FRAME].uses_2pass =
4080  if ((res = update_block_buffers(ctx)) < 0) {
4081  av_log(ctx, AV_LOG_ERROR,
4082  "Failed to allocate block buffers\n");
4083  return res;
4084  }
4085  if (s->refreshctx && s->parallelmode) {
4086  int j, k, l, m;
4087 
4088  for (i = 0; i < 4; i++) {
4089  for (j = 0; j < 2; j++)
4090  for (k = 0; k < 2; k++)
4091  for (l = 0; l < 6; l++)
4092  for (m = 0; m < 6; m++)
4093  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4094  s->prob.coef[i][j][k][l][m], 3);
4095  if (s->txfmmode == i)
4096  break;
4097  }
4098  s->prob_ctx[s->framectxid].p = s->prob.p;
4100  } else if (!s->refreshctx) {
4102  }
4103 
4104  do {
4105  yoff = uvoff = 0;
4106  s->b = s->b_base;
4107  s->block = s->block_base;
4108  s->uvblock[0] = s->uvblock_base[0];
4109  s->uvblock[1] = s->uvblock_base[1];
4110  s->eob = s->eob_base;
4111  s->uveob[0] = s->uveob_base[0];
4112  s->uveob[1] = s->uveob_base[1];
4113 
4114  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4116  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4117  if (s->pass != 2) {
4118  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4119  int64_t tile_size;
4120 
4121  if (tile_col == s->tiling.tile_cols - 1 &&
4122  tile_row == s->tiling.tile_rows - 1) {
4123  tile_size = size;
4124  } else {
4125  tile_size = AV_RB32(data);
4126  data += 4;
4127  size -= 4;
4128  }
4129  if (tile_size > size) {
4130  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4131  return AVERROR_INVALIDDATA;
4132  }
4133  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4134  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4135  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4136  return AVERROR_INVALIDDATA;
4137  }
4138  data += tile_size;
4139  size -= tile_size;
4140  }
4141  }
4142 
4143  for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4144  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4145  struct VP9Filter *lflvl_ptr = s->lflvl;
4146  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4147 
4148  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4150  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4151 
4152  if (s->pass != 2) {
4153  memset(s->left_partition_ctx, 0, 8);
4154  memset(s->left_skip_ctx, 0, 8);
4155  if (s->keyframe || s->intraonly) {
4156  memset(s->left_mode_ctx, DC_PRED, 16);
4157  } else {
4158  memset(s->left_mode_ctx, NEARESTMV, 8);
4159  }
4160  memset(s->left_y_nnz_ctx, 0, 16);
4161  memset(s->left_uv_nnz_ctx, 0, 32);
4162  memset(s->left_segpred_ctx, 0, 8);
4163 
4164  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4165  }
4166 
4167  for (col = s->tiling.tile_col_start;
4168  col < s->tiling.tile_col_end;
4169  col += 8, yoff2 += 64 * bytesperpixel,
4170  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4171  // FIXME integrate with lf code (i.e. zero after each
4172  // use, similar to invtxfm coefficients, or similar)
4173  if (s->pass != 1) {
4174  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4175  }
4176 
4177  if (s->pass == 2) {
4178  decode_sb_mem(ctx, row, col, lflvl_ptr,
4179  yoff2, uvoff2, BL_64X64);
4180  } else {
4181  decode_sb(ctx, row, col, lflvl_ptr,
4182  yoff2, uvoff2, BL_64X64);
4183  }
4184  }
4185  if (s->pass != 2) {
4186  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4187  }
4188  }
4189 
4190  if (s->pass == 1) {
4191  continue;
4192  }
4193 
4194  // backup pre-loopfilter reconstruction data for intra
4195  // prediction of next row of sb64s
4196  if (row + 8 < s->rows) {
4197  memcpy(s->intra_pred_data[0],
4198  f->data[0] + yoff + 63 * ls_y,
4199  8 * s->cols * bytesperpixel);
4200  memcpy(s->intra_pred_data[1],
4201  f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4202  8 * s->cols * bytesperpixel >> s->ss_h);
4203  memcpy(s->intra_pred_data[2],
4204  f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4205  8 * s->cols * bytesperpixel >> s->ss_h);
4206  }
4207 
4208  // loopfilter one row
4209  if (s->filter.level) {
4210  yoff2 = yoff;
4211  uvoff2 = uvoff;
4212  lflvl_ptr = s->lflvl;
4213  for (col = 0; col < s->cols;
4214  col += 8, yoff2 += 64 * bytesperpixel,
4215  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4216  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4217  }
4218  }
4219 
4220  // FIXME maybe we can make this more finegrained by running the
4221  // loopfilter per-block instead of after each sbrow
4222  // In fact that would also make intra pred left preparation easier?
4223  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4224  }
4225  }
4226 
4227  if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4228  adapt_probs(s);
4230  }
4231  } while (s->pass++ == 1);
4232  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4233 
4234  // ref frame setup
4235  for (i = 0; i < 8; i++) {
4236  if (s->refs[i].f->data[0])
4237  ff_thread_release_buffer(ctx, &s->refs[i]);
4238  ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4239  }
4240 
4241  if (!s->invisible) {
4242  if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4243  return res;
4244  *got_frame = 1;
4245  }
4246 
4247  return pkt->size;
4248 }
4249 
4251 {
4252  VP9Context *s = ctx->priv_data;
4253  int i;
4254 
4255  for (i = 0; i < 3; i++)
4256  vp9_unref_frame(ctx, &s->frames[i]);
4257  for (i = 0; i < 8; i++)
4258  ff_thread_release_buffer(ctx, &s->refs[i]);
4259 }
4260 
4261 static int init_frames(AVCodecContext *ctx)
4262 {
4263  VP9Context *s = ctx->priv_data;
4264  int i;
4265 
4266  for (i = 0; i < 3; i++) {
4267  s->frames[i].tf.f = av_frame_alloc();
4268  if (!s->frames[i].tf.f) {
4269  vp9_decode_free(ctx);
4270  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4271  return AVERROR(ENOMEM);
4272  }
4273  }
4274  for (i = 0; i < 8; i++) {
4275  s->refs[i].f = av_frame_alloc();
4276  s->next_refs[i].f = av_frame_alloc();
4277  if (!s->refs[i].f || !s->next_refs[i].f) {
4278  vp9_decode_free(ctx);
4279  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4280  return AVERROR(ENOMEM);
4281  }
4282  }
4283 
4284  return 0;
4285 }
4286 
4288 {
4289  VP9Context *s = ctx->priv_data;
4290 
4291  ctx->internal->allocate_progress = 1;
4292  s->last_bpp = 0;
4293  s->filter.sharpness = -1;
4294 
4295  return init_frames(ctx);
4296 }
4297 
4299 {
4300  return init_frames(avctx);
4301 }
4302 
4304 {
4305  int i, res;
4306  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4307 
4308  // detect size changes in other threads
4309  if (s->intra_pred_data[0] &&
4310  (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4311  free_buffers(s);
4312  }
4313 
4314  for (i = 0; i < 3; i++) {
4315  if (s->frames[i].tf.f->data[0])
4316  vp9_unref_frame(dst, &s->frames[i]);
4317  if (ssrc->frames[i].tf.f->data[0]) {
4318  if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4319  return res;
4320  }
4321  }
4322  for (i = 0; i < 8; i++) {
4323  if (s->refs[i].f->data[0])
4324  ff_thread_release_buffer(dst, &s->refs[i]);
4325  if (ssrc->next_refs[i].f->data[0]) {
4326  if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4327  return res;
4328  }
4329  }
4330 
4331  s->invisible = ssrc->invisible;
4332  s->keyframe = ssrc->keyframe;
4333  s->intraonly = ssrc->intraonly;
4334  s->ss_v = ssrc->ss_v;
4335  s->ss_h = ssrc->ss_h;
4336  s->segmentation.enabled = ssrc->segmentation.enabled;
4337  s->segmentation.update_map = ssrc->segmentation.update_map;
4338  s->bytesperpixel = ssrc->bytesperpixel;
4339  s->bpp = ssrc->bpp;
4340  s->bpp_index = ssrc->bpp_index;
4341  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4342  memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4343  if (ssrc->segmentation.enabled) {
4344  memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4345  sizeof(s->segmentation.feat));
4346  }
4347 
4348  return 0;
4349 }
4350 
4351 static const AVProfile profiles[] = {
4352  { FF_PROFILE_VP9_0, "Profile 0" },
4353  { FF_PROFILE_VP9_1, "Profile 1" },
4354  { FF_PROFILE_VP9_2, "Profile 2" },
4355  { FF_PROFILE_VP9_3, "Profile 3" },
4356  { FF_PROFILE_UNKNOWN },
4357 };
4358 
4360  .name = "vp9",
4361  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4362  .type = AVMEDIA_TYPE_VIDEO,
4363  .id = AV_CODEC_ID_VP9,
4364  .priv_data_size = sizeof(VP9Context),
4365  .init = vp9_decode_init,
4366  .close = vp9_decode_free,
4368  .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4372  .profiles = NULL_IF_CONFIG_SMALL(profiles),
4373 };
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:519
ThreadFrame tf
Definition: vp9.c:74
BlockPartition
Definition: vp9data.h:29