FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "profiles.h"
28 #include "thread.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33 #include "vp9dsp.h"
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
36 
37 #define VP9_SYNCCODE 0x498342
38 
39 struct VP9Filter {
40  uint8_t level[8 * 8];
41  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
42  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
43 };
44 
45 typedef struct VP9Block {
48  VP56mv mv[4 /* b_idx */][2 /* ref */];
49  enum BlockSize bs;
50  enum TxfmMode tx, uvtx;
51  enum BlockLevel bl;
53 } VP9Block;
54 
55 typedef struct VP9Context {
57 
63  unsigned c_b_size;
65  int pass;
66  int row, row7, col, col7;
67  uint8_t *dst[3];
68  ptrdiff_t y_stride, uv_stride;
69 
73  // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
74  // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
75  // and are therefore per-stream. pix_fmt represents the value in the header
76  // of the currently processed frame.
77  int w, h;
78  enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
79  unsigned sb_cols, sb_rows, rows, cols;
81 
82  struct {
85  } filter_lut;
87  struct {
89  uint8_t coef[4][2][2][6][6][3];
90  } prob_ctx[4];
91  struct {
93  uint8_t coef[4][2][2][6][6][11];
94  } prob;
95  struct {
96  unsigned y_mode[4][10];
97  unsigned uv_mode[10][10];
98  unsigned filter[4][3];
99  unsigned mv_mode[7][4];
100  unsigned intra[4][2];
101  unsigned comp[5][2];
102  unsigned single_ref[5][2][2];
103  unsigned comp_ref[5][2];
104  unsigned tx32p[2][4];
105  unsigned tx16p[2][3];
106  unsigned tx8p[2][2];
107  unsigned skip[3][2];
108  unsigned mv_joint[4];
109  struct {
110  unsigned sign[2];
111  unsigned classes[11];
112  unsigned class0[2];
113  unsigned bits[10][2];
114  unsigned class0_fp[2][4];
115  unsigned fp[4];
116  unsigned class0_hp[2];
117  unsigned hp[2];
118  } mv_comp[2];
119  unsigned partition[4][4][4];
120  unsigned coef[4][2][2][6][6][3];
121  unsigned eob[4][2][2][6][6][2];
122  } counts;
123 
124  // contextual (left/above) cache
139  // FIXME maybe merge some of the below in a flags field?
150 
151  // whole-frame cache
153  struct VP9Filter *lflvl;
155 
156  // block reconstruction intermediates
158  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
160  struct { int x, y; } min_mv, max_mv;
161  DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
162  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
163  uint16_t mvscale[3][2];
165 } VP9Context;
166 
167 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
168  {
169  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
170  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
171  }, {
172  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
173  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
174  }
175 };
176 
178 {
179  ff_thread_release_buffer(ctx, &f->tf);
182  f->segmentation_map = NULL;
184 }
185 
187 {
188  VP9Context *s = ctx->priv_data;
189  int ret, sz;
190 
191  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
192  return ret;
193  sz = 64 * s->sb_cols * s->sb_rows;
194  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
195  goto fail;
196  }
197 
199  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
200 
201  if (ctx->hwaccel) {
202  const AVHWAccel *hwaccel = ctx->hwaccel;
204  if (hwaccel->frame_priv_data_size) {
206  if (!f->hwaccel_priv_buf)
207  goto fail;
209  }
210  }
211 
212  return 0;
213 
214 fail:
215  vp9_unref_frame(ctx, f);
216  return AVERROR(ENOMEM);
217 }
218 
220 {
221  int res;
222 
223  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
224  return res;
225  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
226  goto fail;
227  }
228 
230  dst->mv = src->mv;
231  dst->uses_2pass = src->uses_2pass;
232 
233  if (src->hwaccel_picture_private) {
235  if (!dst->hwaccel_priv_buf)
236  goto fail;
238  }
239 
240  return 0;
241 
242 fail:
243  vp9_unref_frame(ctx, dst);
244  return AVERROR(ENOMEM);
245 }
246 
247 static int update_size(AVCodecContext *ctx, int w, int h)
248 {
249 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
250  enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
251  VP9Context *s = ctx->priv_data;
252  uint8_t *p;
253  int bytesperpixel = s->bytesperpixel, res, cols, rows;
254 
255  av_assert0(w > 0 && h > 0);
256 
257  if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
258  if ((res = ff_set_dimensions(ctx, w, h)) < 0)
259  return res;
260 
261  if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
262 #if CONFIG_VP9_DXVA2_HWACCEL
263  *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
264 #endif
265 #if CONFIG_VP9_D3D11VA_HWACCEL
266  *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
267 #endif
268 #if CONFIG_VP9_VAAPI_HWACCEL
269  *fmtp++ = AV_PIX_FMT_VAAPI;
270 #endif
271  }
272 
273  *fmtp++ = s->pix_fmt;
274  *fmtp = AV_PIX_FMT_NONE;
275 
276  res = ff_thread_get_format(ctx, pix_fmts);
277  if (res < 0)
278  return res;
279 
280  ctx->pix_fmt = res;
281  s->gf_fmt = s->pix_fmt;
282  s->w = w;
283  s->h = h;
284  }
285 
286  cols = (w + 7) >> 3;
287  rows = (h + 7) >> 3;
288 
289  if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
290  return 0;
291 
292  s->last_fmt = s->pix_fmt;
293  s->sb_cols = (w + 63) >> 6;
294  s->sb_rows = (h + 63) >> 6;
295  s->cols = (w + 7) >> 3;
296  s->rows = (h + 7) >> 3;
297 
298 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
299  av_freep(&s->intra_pred_data[0]);
300  // FIXME we slightly over-allocate here for subsampled chroma, but a little
301  // bit of padding shouldn't affect performance...
302  p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
303  sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
304  if (!p)
305  return AVERROR(ENOMEM);
306  assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
307  assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
308  assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
309  assign(s->above_y_nnz_ctx, uint8_t *, 16);
310  assign(s->above_mode_ctx, uint8_t *, 16);
311  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
312  assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
313  assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
315  assign(s->above_skip_ctx, uint8_t *, 8);
316  assign(s->above_txfm_ctx, uint8_t *, 8);
317  assign(s->above_segpred_ctx, uint8_t *, 8);
318  assign(s->above_intra_ctx, uint8_t *, 8);
319  assign(s->above_comp_ctx, uint8_t *, 8);
320  assign(s->above_ref_ctx, uint8_t *, 8);
321  assign(s->above_filter_ctx, uint8_t *, 8);
322  assign(s->lflvl, struct VP9Filter *, 1);
323 #undef assign
324 
325  // these will be re-allocated a little later
326  av_freep(&s->b_base);
327  av_freep(&s->block_base);
328 
329  if (s->bpp != s->last_bpp) {
331  ff_videodsp_init(&s->vdsp, s->bpp);
332  s->last_bpp = s->bpp;
333  }
334 
335  return 0;
336 }
337 
339 {
340  VP9Context *s = ctx->priv_data;
341  int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
342 
344  return 0;
345 
346  av_free(s->b_base);
347  av_free(s->block_base);
348  chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
349  chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
350  if (s->s.frames[CUR_FRAME].uses_2pass) {
351  int sbs = s->sb_cols * s->sb_rows;
352 
353  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
354  s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
355  16 * 16 + 2 * chroma_eobs) * sbs);
356  if (!s->b_base || !s->block_base)
357  return AVERROR(ENOMEM);
358  s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
359  s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
360  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
361  s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
362  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
363  } else {
364  s->b_base = av_malloc(sizeof(VP9Block));
365  s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
366  16 * 16 + 2 * chroma_eobs);
367  if (!s->b_base || !s->block_base)
368  return AVERROR(ENOMEM);
369  s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
370  s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
371  s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
372  s->uveob_base[0] = s->eob_base + 16 * 16;
373  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
374  }
376 
377  return 0;
378 }
379 
380 // for some reason the sign bit is at the end, not the start, of a bit sequence
382 {
383  int v = get_bits(gb, n);
384  return get_bits1(gb) ? -v : v;
385 }
386 
387 static av_always_inline int inv_recenter_nonneg(int v, int m)
388 {
389  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
390 }
391 
392 // differential forward probability updates
393 static int update_prob(VP56RangeCoder *c, int p)
394 {
395  static const int inv_map_table[255] = {
396  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
397  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
398  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
399  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
400  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
401  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
402  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
403  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
404  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
405  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
406  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
407  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
408  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
409  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
410  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
411  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
412  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
413  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
414  252, 253, 253,
415  };
416  int d;
417 
418  /* This code is trying to do a differential probability update. For a
419  * current probability A in the range [1, 255], the difference to a new
420  * probability of any value can be expressed differentially as 1-A,255-A
421  * where some part of this (absolute range) exists both in positive as
422  * well as the negative part, whereas another part only exists in one
423  * half. We're trying to code this shared part differentially, i.e.
424  * times two where the value of the lowest bit specifies the sign, and
425  * the single part is then coded on top of this. This absolute difference
426  * then again has a value of [0,254], but a bigger value in this range
427  * indicates that we're further away from the original value A, so we
428  * can code this as a VLC code, since higher values are increasingly
429  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
430  * updates vs. the 'fine, exact' updates further down the range, which
431  * adds one extra dimension to this differential update model. */
432 
433  if (!vp8_rac_get(c)) {
434  d = vp8_rac_get_uint(c, 4) + 0;
435  } else if (!vp8_rac_get(c)) {
436  d = vp8_rac_get_uint(c, 4) + 16;
437  } else if (!vp8_rac_get(c)) {
438  d = vp8_rac_get_uint(c, 5) + 32;
439  } else {
440  d = vp8_rac_get_uint(c, 7);
441  if (d >= 65)
442  d = (d << 1) - 65 + vp8_rac_get(c);
443  d += 64;
444  av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
445  }
446 
447  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
448  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
449 }
450 
452 {
453  static const enum AVColorSpace colorspaces[8] = {
456  };
457  VP9Context *s = ctx->priv_data;
458  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
459 
460  s->bpp_index = bits;
461  s->bpp = 8 + bits * 2;
462  s->bytesperpixel = (7 + s->bpp) >> 3;
463  ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
464  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
465  static const enum AVPixelFormat pix_fmt_rgb[3] = {
467  };
468  s->ss_h = s->ss_v = 0;
470  s->pix_fmt = pix_fmt_rgb[bits];
471  if (ctx->profile & 1) {
472  if (get_bits1(&s->gb)) {
473  av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
474  return AVERROR_INVALIDDATA;
475  }
476  } else {
477  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
478  ctx->profile);
479  return AVERROR_INVALIDDATA;
480  }
481  } else {
482  static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
489  };
491  if (ctx->profile & 1) {
492  s->ss_h = get_bits1(&s->gb);
493  s->ss_v = get_bits1(&s->gb);
494  s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
495  if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
496  av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
497  ctx->profile);
498  return AVERROR_INVALIDDATA;
499  } else if (get_bits1(&s->gb)) {
500  av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
501  ctx->profile);
502  return AVERROR_INVALIDDATA;
503  }
504  } else {
505  s->ss_h = s->ss_v = 1;
506  s->pix_fmt = pix_fmt_for_ss[bits][1][1];
507  }
508  }
509 
510  return 0;
511 }
512 
514  const uint8_t *data, int size, int *ref)
515 {
516  VP9Context *s = ctx->priv_data;
517  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
518  int last_invisible;
519  const uint8_t *data2;
520 
521  /* general header */
522  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
523  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
524  return res;
525  }
526  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
527  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
528  return AVERROR_INVALIDDATA;
529  }
530  ctx->profile = get_bits1(&s->gb);
531  ctx->profile |= get_bits1(&s->gb) << 1;
532  if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
533  if (ctx->profile > 3) {
534  av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
535  return AVERROR_INVALIDDATA;
536  }
537  s->s.h.profile = ctx->profile;
538  if (get_bits1(&s->gb)) {
539  *ref = get_bits(&s->gb, 3);
540  return 0;
541  }
542  s->last_keyframe = s->s.h.keyframe;
543  s->s.h.keyframe = !get_bits1(&s->gb);
544  last_invisible = s->s.h.invisible;
545  s->s.h.invisible = !get_bits1(&s->gb);
546  s->s.h.errorres = get_bits1(&s->gb);
547  s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
548  if (s->s.h.keyframe) {
549  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
550  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
551  return AVERROR_INVALIDDATA;
552  }
553  if ((res = read_colorspace_details(ctx)) < 0)
554  return res;
555  // for profile 1, here follows the subsampling bits
556  s->s.h.refreshrefmask = 0xff;
557  w = get_bits(&s->gb, 16) + 1;
558  h = get_bits(&s->gb, 16) + 1;
559  if (get_bits1(&s->gb)) // display size
560  skip_bits(&s->gb, 32);
561  } else {
562  s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
563  s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
564  if (s->s.h.intraonly) {
565  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
566  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
567  return AVERROR_INVALIDDATA;
568  }
569  if (ctx->profile >= 1) {
570  if ((res = read_colorspace_details(ctx)) < 0)
571  return res;
572  } else {
573  s->ss_h = s->ss_v = 1;
574  s->bpp = 8;
575  s->bpp_index = 0;
576  s->bytesperpixel = 1;
577  s->pix_fmt = AV_PIX_FMT_YUV420P;
580  }
581  s->s.h.refreshrefmask = get_bits(&s->gb, 8);
582  w = get_bits(&s->gb, 16) + 1;
583  h = get_bits(&s->gb, 16) + 1;
584  if (get_bits1(&s->gb)) // display size
585  skip_bits(&s->gb, 32);
586  } else {
587  s->s.h.refreshrefmask = get_bits(&s->gb, 8);
588  s->s.h.refidx[0] = get_bits(&s->gb, 3);
589  s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
590  s->s.h.refidx[1] = get_bits(&s->gb, 3);
591  s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
592  s->s.h.refidx[2] = get_bits(&s->gb, 3);
593  s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
594  if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
595  !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
596  !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
597  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
598  return AVERROR_INVALIDDATA;
599  }
600  if (get_bits1(&s->gb)) {
601  w = s->s.refs[s->s.h.refidx[0]].f->width;
602  h = s->s.refs[s->s.h.refidx[0]].f->height;
603  } else if (get_bits1(&s->gb)) {
604  w = s->s.refs[s->s.h.refidx[1]].f->width;
605  h = s->s.refs[s->s.h.refidx[1]].f->height;
606  } else if (get_bits1(&s->gb)) {
607  w = s->s.refs[s->s.h.refidx[2]].f->width;
608  h = s->s.refs[s->s.h.refidx[2]].f->height;
609  } else {
610  w = get_bits(&s->gb, 16) + 1;
611  h = get_bits(&s->gb, 16) + 1;
612  }
613  // Note that in this code, "CUR_FRAME" is actually before we
614  // have formally allocated a frame, and thus actually represents
615  // the _last_ frame
616  s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
617  s->s.frames[CUR_FRAME].tf.f->height == h;
618  if (get_bits1(&s->gb)) // display size
619  skip_bits(&s->gb, 32);
620  s->s.h.highprecisionmvs = get_bits1(&s->gb);
622  get_bits(&s->gb, 2);
623  s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
624  s->s.h.signbias[0] != s->s.h.signbias[2];
625  if (s->s.h.allowcompinter) {
626  if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
627  s->s.h.fixcompref = 2;
628  s->s.h.varcompref[0] = 0;
629  s->s.h.varcompref[1] = 1;
630  } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
631  s->s.h.fixcompref = 1;
632  s->s.h.varcompref[0] = 0;
633  s->s.h.varcompref[1] = 2;
634  } else {
635  s->s.h.fixcompref = 0;
636  s->s.h.varcompref[0] = 1;
637  s->s.h.varcompref[1] = 2;
638  }
639  }
640  }
641  }
642  s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
643  s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
644  s->s.h.framectxid = c = get_bits(&s->gb, 2);
645 
646  /* loopfilter header data */
647  if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
648  // reset loopfilter defaults
649  s->s.h.lf_delta.ref[0] = 1;
650  s->s.h.lf_delta.ref[1] = 0;
651  s->s.h.lf_delta.ref[2] = -1;
652  s->s.h.lf_delta.ref[3] = -1;
653  s->s.h.lf_delta.mode[0] = 0;
654  s->s.h.lf_delta.mode[1] = 0;
655  memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
656  }
657  s->s.h.filter.level = get_bits(&s->gb, 6);
658  sharp = get_bits(&s->gb, 3);
659  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
660  // the old cache values since they are still valid
661  if (s->s.h.filter.sharpness != sharp)
662  memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
663  s->s.h.filter.sharpness = sharp;
664  if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
665  if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
666  for (i = 0; i < 4; i++)
667  if (get_bits1(&s->gb))
668  s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
669  for (i = 0; i < 2; i++)
670  if (get_bits1(&s->gb))
671  s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
672  }
673  }
674 
675  /* quantization header data */
676  s->s.h.yac_qi = get_bits(&s->gb, 8);
677  s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
678  s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
679  s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
680  s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
681  s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
682  if (s->s.h.lossless)
684 
685  /* segmentation header info */
686  if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
687  if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
688  for (i = 0; i < 7; i++)
689  s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
690  get_bits(&s->gb, 8) : 255;
691  if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) {
692  for (i = 0; i < 3; i++)
693  s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
694  get_bits(&s->gb, 8) : 255;
695  }
696  }
697 
698  if (get_bits1(&s->gb)) {
700  for (i = 0; i < 8; i++) {
701  if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
702  s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
703  if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
704  s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
705  if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
706  s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
707  s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
708  }
709  }
710  }
711 
712  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
713  for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
714  int qyac, qydc, quvac, quvdc, lflvl, sh;
715 
716  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
717  if (s->s.h.segmentation.absolute_vals)
718  qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
719  else
720  qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
721  } else {
722  qyac = s->s.h.yac_qi;
723  }
724  qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
725  quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
726  quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
727  qyac = av_clip_uintp2(qyac, 8);
728 
729  s->s.h.segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
730  s->s.h.segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
731  s->s.h.segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
732  s->s.h.segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
733 
734  sh = s->s.h.filter.level >= 32;
735  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
736  if (s->s.h.segmentation.absolute_vals)
737  lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
738  else
739  lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
740  } else {
741  lflvl = s->s.h.filter.level;
742  }
743  if (s->s.h.lf_delta.enabled) {
744  s->s.h.segmentation.feat[i].lflvl[0][0] =
745  s->s.h.segmentation.feat[i].lflvl[0][1] =
746  av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] << sh), 6);
747  for (j = 1; j < 4; j++) {
748  s->s.h.segmentation.feat[i].lflvl[j][0] =
749  av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
750  s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
751  s->s.h.segmentation.feat[i].lflvl[j][1] =
752  av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
753  s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
754  }
755  } else {
756  memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
757  sizeof(s->s.h.segmentation.feat[i].lflvl));
758  }
759  }
760 
761  /* tiling info */
762  if ((res = update_size(ctx, w, h)) < 0) {
763  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
764  w, h, s->pix_fmt);
765  return res;
766  }
767  for (s->s.h.tiling.log2_tile_cols = 0;
768  s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
769  s->s.h.tiling.log2_tile_cols++) ;
770  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
771  max = FFMAX(0, max - 1);
772  while (max > s->s.h.tiling.log2_tile_cols) {
773  if (get_bits1(&s->gb))
774  s->s.h.tiling.log2_tile_cols++;
775  else
776  break;
777  }
778  s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
779  s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
780  if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
781  s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
782  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
783  sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
784  if (!s->c_b) {
785  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
786  return AVERROR(ENOMEM);
787  }
788  }
789 
790  /* check reference frames */
791  if (!s->s.h.keyframe && !s->s.h.intraonly) {
792  for (i = 0; i < 3; i++) {
793  AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
794  int refw = ref->width, refh = ref->height;
795 
796  if (ref->format != ctx->pix_fmt) {
797  av_log(ctx, AV_LOG_ERROR,
798  "Ref pixfmt (%s) did not match current frame (%s)",
801  return AVERROR_INVALIDDATA;
802  } else if (refw == w && refh == h) {
803  s->mvscale[i][0] = s->mvscale[i][1] = 0;
804  } else {
805  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
806  av_log(ctx, AV_LOG_ERROR,
807  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
808  refw, refh, w, h);
809  return AVERROR_INVALIDDATA;
810  }
811  s->mvscale[i][0] = (refw << 14) / w;
812  s->mvscale[i][1] = (refh << 14) / h;
813  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
814  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
815  }
816  }
817  }
818 
819  if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
820  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
821  s->prob_ctx[3].p = vp9_default_probs;
822  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
823  sizeof(vp9_default_coef_probs));
824  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
825  sizeof(vp9_default_coef_probs));
826  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
827  sizeof(vp9_default_coef_probs));
828  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
829  sizeof(vp9_default_coef_probs));
830  } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
832  memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
833  sizeof(vp9_default_coef_probs));
834  }
835 
836  // next 16 bits is size of the rest of the header (arith-coded)
837  s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
838  s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
839 
840  data2 = align_get_bits(&s->gb);
841  if (size2 > size - (data2 - data)) {
842  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
843  return AVERROR_INVALIDDATA;
844  }
845  ff_vp56_init_range_decoder(&s->c, data2, size2);
846  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
847  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
848  return AVERROR_INVALIDDATA;
849  }
850 
851  if (s->s.h.keyframe || s->s.h.intraonly) {
852  memset(s->counts.coef, 0, sizeof(s->counts.coef));
853  memset(s->counts.eob, 0, sizeof(s->counts.eob));
854  } else {
855  memset(&s->counts, 0, sizeof(s->counts));
856  }
857  // FIXME is it faster to not copy here, but do it down in the fw updates
858  // as explicit copies if the fw update is missing (and skip the copy upon
859  // fw update)?
860  s->prob.p = s->prob_ctx[c].p;
861 
862  // txfm updates
863  if (s->s.h.lossless) {
864  s->s.h.txfmmode = TX_4X4;
865  } else {
866  s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
867  if (s->s.h.txfmmode == 3)
868  s->s.h.txfmmode += vp8_rac_get(&s->c);
869 
870  if (s->s.h.txfmmode == TX_SWITCHABLE) {
871  for (i = 0; i < 2; i++)
872  if (vp56_rac_get_prob_branchy(&s->c, 252))
873  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
874  for (i = 0; i < 2; i++)
875  for (j = 0; j < 2; j++)
876  if (vp56_rac_get_prob_branchy(&s->c, 252))
877  s->prob.p.tx16p[i][j] =
878  update_prob(&s->c, s->prob.p.tx16p[i][j]);
879  for (i = 0; i < 2; i++)
880  for (j = 0; j < 3; j++)
881  if (vp56_rac_get_prob_branchy(&s->c, 252))
882  s->prob.p.tx32p[i][j] =
883  update_prob(&s->c, s->prob.p.tx32p[i][j]);
884  }
885  }
886 
887  // coef updates
888  for (i = 0; i < 4; i++) {
889  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
890  if (vp8_rac_get(&s->c)) {
891  for (j = 0; j < 2; j++)
892  for (k = 0; k < 2; k++)
893  for (l = 0; l < 6; l++)
894  for (m = 0; m < 6; m++) {
895  uint8_t *p = s->prob.coef[i][j][k][l][m];
896  uint8_t *r = ref[j][k][l][m];
897  if (m >= 3 && l == 0) // dc only has 3 pt
898  break;
899  for (n = 0; n < 3; n++) {
900  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
901  p[n] = update_prob(&s->c, r[n]);
902  } else {
903  p[n] = r[n];
904  }
905  }
906  p[3] = 0;
907  }
908  } else {
909  for (j = 0; j < 2; j++)
910  for (k = 0; k < 2; k++)
911  for (l = 0; l < 6; l++)
912  for (m = 0; m < 6; m++) {
913  uint8_t *p = s->prob.coef[i][j][k][l][m];
914  uint8_t *r = ref[j][k][l][m];
915  if (m > 3 && l == 0) // dc only has 3 pt
916  break;
917  memcpy(p, r, 3);
918  p[3] = 0;
919  }
920  }
921  if (s->s.h.txfmmode == i)
922  break;
923  }
924 
925  // mode updates
926  for (i = 0; i < 3; i++)
927  if (vp56_rac_get_prob_branchy(&s->c, 252))
928  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
929  if (!s->s.h.keyframe && !s->s.h.intraonly) {
930  for (i = 0; i < 7; i++)
931  for (j = 0; j < 3; j++)
932  if (vp56_rac_get_prob_branchy(&s->c, 252))
933  s->prob.p.mv_mode[i][j] =
934  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
935 
936  if (s->s.h.filtermode == FILTER_SWITCHABLE)
937  for (i = 0; i < 4; i++)
938  for (j = 0; j < 2; j++)
939  if (vp56_rac_get_prob_branchy(&s->c, 252))
940  s->prob.p.filter[i][j] =
941  update_prob(&s->c, s->prob.p.filter[i][j]);
942 
943  for (i = 0; i < 4; i++)
944  if (vp56_rac_get_prob_branchy(&s->c, 252))
945  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
946 
947  if (s->s.h.allowcompinter) {
948  s->s.h.comppredmode = vp8_rac_get(&s->c);
949  if (s->s.h.comppredmode)
950  s->s.h.comppredmode += vp8_rac_get(&s->c);
951  if (s->s.h.comppredmode == PRED_SWITCHABLE)
952  for (i = 0; i < 5; i++)
953  if (vp56_rac_get_prob_branchy(&s->c, 252))
954  s->prob.p.comp[i] =
955  update_prob(&s->c, s->prob.p.comp[i]);
956  } else {
958  }
959 
960  if (s->s.h.comppredmode != PRED_COMPREF) {
961  for (i = 0; i < 5; i++) {
962  if (vp56_rac_get_prob_branchy(&s->c, 252))
963  s->prob.p.single_ref[i][0] =
964  update_prob(&s->c, s->prob.p.single_ref[i][0]);
965  if (vp56_rac_get_prob_branchy(&s->c, 252))
966  s->prob.p.single_ref[i][1] =
967  update_prob(&s->c, s->prob.p.single_ref[i][1]);
968  }
969  }
970 
971  if (s->s.h.comppredmode != PRED_SINGLEREF) {
972  for (i = 0; i < 5; i++)
973  if (vp56_rac_get_prob_branchy(&s->c, 252))
974  s->prob.p.comp_ref[i] =
975  update_prob(&s->c, s->prob.p.comp_ref[i]);
976  }
977 
978  for (i = 0; i < 4; i++)
979  for (j = 0; j < 9; j++)
980  if (vp56_rac_get_prob_branchy(&s->c, 252))
981  s->prob.p.y_mode[i][j] =
982  update_prob(&s->c, s->prob.p.y_mode[i][j]);
983 
984  for (i = 0; i < 4; i++)
985  for (j = 0; j < 4; j++)
986  for (k = 0; k < 3; k++)
987  if (vp56_rac_get_prob_branchy(&s->c, 252))
988  s->prob.p.partition[3 - i][j][k] =
989  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
990 
991  // mv fields don't use the update_prob subexp model for some reason
992  for (i = 0; i < 3; i++)
993  if (vp56_rac_get_prob_branchy(&s->c, 252))
994  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
995 
996  for (i = 0; i < 2; i++) {
997  if (vp56_rac_get_prob_branchy(&s->c, 252))
998  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
999 
1000  for (j = 0; j < 10; j++)
1001  if (vp56_rac_get_prob_branchy(&s->c, 252))
1002  s->prob.p.mv_comp[i].classes[j] =
1003  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1004 
1005  if (vp56_rac_get_prob_branchy(&s->c, 252))
1006  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1007 
1008  for (j = 0; j < 10; j++)
1009  if (vp56_rac_get_prob_branchy(&s->c, 252))
1010  s->prob.p.mv_comp[i].bits[j] =
1011  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1012  }
1013 
1014  for (i = 0; i < 2; i++) {
1015  for (j = 0; j < 2; j++)
1016  for (k = 0; k < 3; k++)
1017  if (vp56_rac_get_prob_branchy(&s->c, 252))
1018  s->prob.p.mv_comp[i].class0_fp[j][k] =
1019  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1020 
1021  for (j = 0; j < 3; j++)
1022  if (vp56_rac_get_prob_branchy(&s->c, 252))
1023  s->prob.p.mv_comp[i].fp[j] =
1024  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1025  }
1026 
1027  if (s->s.h.highprecisionmvs) {
1028  for (i = 0; i < 2; i++) {
1029  if (vp56_rac_get_prob_branchy(&s->c, 252))
1030  s->prob.p.mv_comp[i].class0_hp =
1031  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1032 
1033  if (vp56_rac_get_prob_branchy(&s->c, 252))
1034  s->prob.p.mv_comp[i].hp =
1035  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036  }
1037  }
1038  }
1039 
1040  return (data2 - data) + size2;
1041 }
1042 
1043 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1044  VP9Context *s)
1045 {
1046  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1047  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1048 }
1049 
1051  VP56mv *pmv, int ref, int z, int idx, int sb)
1052 {
1053  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1054  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1055  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1056  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1057  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1058  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1059  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1060  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1061  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1062  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1063  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1064  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1065  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1066  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1067  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1068  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1069  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1070  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1071  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1072  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1073  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1074  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1075  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1076  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1077  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1078  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1079  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1080  };
1081  VP9Block *b = s->b;
1082  int row = s->row, col = s->col, row7 = s->row7;
1083  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1084 #define INVALID_MV 0x80008000U
1085  uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1086  int i;
1087 
1088 #define RETURN_DIRECT_MV(mv) \
1089  do { \
1090  uint32_t m = AV_RN32A(&mv); \
1091  if (!idx) { \
1092  AV_WN32A(pmv, m); \
1093  return; \
1094  } else if (mem == INVALID_MV) { \
1095  mem = m; \
1096  } else if (m != mem) { \
1097  AV_WN32A(pmv, m); \
1098  return; \
1099  } \
1100  } while (0)
1101 
1102  if (sb >= 0) {
1103  if (sb == 2 || sb == 1) {
1104  RETURN_DIRECT_MV(b->mv[0][z]);
1105  } else if (sb == 3) {
1106  RETURN_DIRECT_MV(b->mv[2][z]);
1107  RETURN_DIRECT_MV(b->mv[1][z]);
1108  RETURN_DIRECT_MV(b->mv[0][z]);
1109  }
1110 
1111 #define RETURN_MV(mv) \
1112  do { \
1113  if (sb > 0) { \
1114  VP56mv tmp; \
1115  uint32_t m; \
1116  av_assert2(idx == 1); \
1117  av_assert2(mem != INVALID_MV); \
1118  if (mem_sub8x8 == INVALID_MV) { \
1119  clamp_mv(&tmp, &mv, s); \
1120  m = AV_RN32A(&tmp); \
1121  if (m != mem) { \
1122  AV_WN32A(pmv, m); \
1123  return; \
1124  } \
1125  mem_sub8x8 = AV_RN32A(&mv); \
1126  } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1127  clamp_mv(&tmp, &mv, s); \
1128  m = AV_RN32A(&tmp); \
1129  if (m != mem) { \
1130  AV_WN32A(pmv, m); \
1131  } else { \
1132  /* BUG I'm pretty sure this isn't the intention */ \
1133  AV_WN32A(pmv, 0); \
1134  } \
1135  return; \
1136  } \
1137  } else { \
1138  uint32_t m = AV_RN32A(&mv); \
1139  if (!idx) { \
1140  clamp_mv(pmv, &mv, s); \
1141  return; \
1142  } else if (mem == INVALID_MV) { \
1143  mem = m; \
1144  } else if (m != mem) { \
1145  clamp_mv(pmv, &mv, s); \
1146  return; \
1147  } \
1148  } \
1149  } while (0)
1150 
1151  if (row > 0) {
1152  struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1153  if (mv->ref[0] == ref) {
1154  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1155  } else if (mv->ref[1] == ref) {
1156  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1157  }
1158  }
1159  if (col > s->tile_col_start) {
1160  struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1161  if (mv->ref[0] == ref) {
1162  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1163  } else if (mv->ref[1] == ref) {
1164  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1165  }
1166  }
1167  i = 2;
1168  } else {
1169  i = 0;
1170  }
1171 
1172  // previously coded MVs in this neighbourhood, using same reference frame
1173  for (; i < 8; i++) {
1174  int c = p[i][0] + col, r = p[i][1] + row;
1175 
1176  if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1177  struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1178 
1179  if (mv->ref[0] == ref) {
1180  RETURN_MV(mv->mv[0]);
1181  } else if (mv->ref[1] == ref) {
1182  RETURN_MV(mv->mv[1]);
1183  }
1184  }
1185  }
1186 
1187  // MV at this position in previous frame, using same reference frame
1188  if (s->s.h.use_last_frame_mvs) {
1189  struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1190 
1193  if (mv->ref[0] == ref) {
1194  RETURN_MV(mv->mv[0]);
1195  } else if (mv->ref[1] == ref) {
1196  RETURN_MV(mv->mv[1]);
1197  }
1198  }
1199 
1200 #define RETURN_SCALE_MV(mv, scale) \
1201  do { \
1202  if (scale) { \
1203  VP56mv mv_temp = { -mv.x, -mv.y }; \
1204  RETURN_MV(mv_temp); \
1205  } else { \
1206  RETURN_MV(mv); \
1207  } \
1208  } while (0)
1209 
1210  // previously coded MVs in this neighbourhood, using different reference frame
1211  for (i = 0; i < 8; i++) {
1212  int c = p[i][0] + col, r = p[i][1] + row;
1213 
1214  if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1215  struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1216 
1217  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1218  RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1219  }
1220  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1221  // BUG - libvpx has this condition regardless of whether
1222  // we used the first ref MV and pre-scaling
1223  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1224  RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1225  }
1226  }
1227  }
1228 
1229  // MV at this position in previous frame, using different reference frame
1230  if (s->s.h.use_last_frame_mvs) {
1231  struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1232 
1233  // no need to await_progress, because we already did that above
1234  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1235  RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1236  }
1237  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1238  // BUG - libvpx has this condition regardless of whether
1239  // we used the first ref MV and pre-scaling
1240  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1241  RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1242  }
1243  }
1244 
1245  AV_ZERO32(pmv);
1246  clamp_mv(pmv, pmv, s);
1247 #undef INVALID_MV
1248 #undef RETURN_MV
1249 #undef RETURN_SCALE_MV
1250 }
1251 
1252 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1253 {
1254  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1255  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1256  s->prob.p.mv_comp[idx].classes);
1257 
1258  s->counts.mv_comp[idx].sign[sign]++;
1259  s->counts.mv_comp[idx].classes[c]++;
1260  if (c) {
1261  int m;
1262 
1263  for (n = 0, m = 0; m < c; m++) {
1264  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1265  n |= bit << m;
1266  s->counts.mv_comp[idx].bits[m][bit]++;
1267  }
1268  n <<= 3;
1269  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1270  n |= bit << 1;
1271  s->counts.mv_comp[idx].fp[bit]++;
1272  if (hp) {
1273  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1274  s->counts.mv_comp[idx].hp[bit]++;
1275  n |= bit;
1276  } else {
1277  n |= 1;
1278  // bug in libvpx - we count for bw entropy purposes even if the
1279  // bit wasn't coded
1280  s->counts.mv_comp[idx].hp[1]++;
1281  }
1282  n += 8 << c;
1283  } else {
1284  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1285  s->counts.mv_comp[idx].class0[n]++;
1286  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1287  s->prob.p.mv_comp[idx].class0_fp[n]);
1288  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1289  n = (n << 3) | (bit << 1);
1290  if (hp) {
1291  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1292  s->counts.mv_comp[idx].class0_hp[bit]++;
1293  n |= bit;
1294  } else {
1295  n |= 1;
1296  // bug in libvpx - we count for bw entropy purposes even if the
1297  // bit wasn't coded
1298  s->counts.mv_comp[idx].class0_hp[1]++;
1299  }
1300  }
1301 
1302  return sign ? -(n + 1) : (n + 1);
1303 }
1304 
1305 static void fill_mv(VP9Context *s,
1306  VP56mv *mv, int mode, int sb)
1307 {
1308  VP9Block *b = s->b;
1309 
1310  if (mode == ZEROMV) {
1311  AV_ZERO64(mv);
1312  } else {
1313  int hp;
1314 
1315  // FIXME cache this value and reuse for other subblocks
1316  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1317  mode == NEWMV ? -1 : sb);
1318  // FIXME maybe move this code into find_ref_mvs()
1319  if ((mode == NEWMV || sb == -1) &&
1320  !(hp = s->s.h.highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1321  if (mv[0].y & 1) {
1322  if (mv[0].y < 0)
1323  mv[0].y++;
1324  else
1325  mv[0].y--;
1326  }
1327  if (mv[0].x & 1) {
1328  if (mv[0].x < 0)
1329  mv[0].x++;
1330  else
1331  mv[0].x--;
1332  }
1333  }
1334  if (mode == NEWMV) {
1336  s->prob.p.mv_joint);
1337 
1338  s->counts.mv_joint[j]++;
1339  if (j >= MV_JOINT_V)
1340  mv[0].y += read_mv_component(s, 0, hp);
1341  if (j & 1)
1342  mv[0].x += read_mv_component(s, 1, hp);
1343  }
1344 
1345  if (b->comp) {
1346  // FIXME cache this value and reuse for other subblocks
1347  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1348  mode == NEWMV ? -1 : sb);
1349  if ((mode == NEWMV || sb == -1) &&
1350  !(hp = s->s.h.highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1351  if (mv[1].y & 1) {
1352  if (mv[1].y < 0)
1353  mv[1].y++;
1354  else
1355  mv[1].y--;
1356  }
1357  if (mv[1].x & 1) {
1358  if (mv[1].x < 0)
1359  mv[1].x++;
1360  else
1361  mv[1].x--;
1362  }
1363  }
1364  if (mode == NEWMV) {
1366  s->prob.p.mv_joint);
1367 
1368  s->counts.mv_joint[j]++;
1369  if (j >= MV_JOINT_V)
1370  mv[1].y += read_mv_component(s, 0, hp);
1371  if (j & 1)
1372  mv[1].x += read_mv_component(s, 1, hp);
1373  }
1374  }
1375  }
1376 }
1377 
1378 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1379  ptrdiff_t stride, int v)
1380 {
1381  switch (w) {
1382  case 1:
1383  do {
1384  *ptr = v;
1385  ptr += stride;
1386  } while (--h);
1387  break;
1388  case 2: {
1389  int v16 = v * 0x0101;
1390  do {
1391  AV_WN16A(ptr, v16);
1392  ptr += stride;
1393  } while (--h);
1394  break;
1395  }
1396  case 4: {
1397  uint32_t v32 = v * 0x01010101;
1398  do {
1399  AV_WN32A(ptr, v32);
1400  ptr += stride;
1401  } while (--h);
1402  break;
1403  }
1404  case 8: {
1405 #if HAVE_FAST_64BIT
1406  uint64_t v64 = v * 0x0101010101010101ULL;
1407  do {
1408  AV_WN64A(ptr, v64);
1409  ptr += stride;
1410  } while (--h);
1411 #else
1412  uint32_t v32 = v * 0x01010101;
1413  do {
1414  AV_WN32A(ptr, v32);
1415  AV_WN32A(ptr + 4, v32);
1416  ptr += stride;
1417  } while (--h);
1418 #endif
1419  break;
1420  }
1421  }
1422 }
1423 
1425 {
1426  static const uint8_t left_ctx[N_BS_SIZES] = {
1427  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1428  };
1429  static const uint8_t above_ctx[N_BS_SIZES] = {
1430  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1431  };
1432  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1434  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1435  };
1436  VP9Context *s = ctx->priv_data;
1437  VP9Block *b = s->b;
1438  int row = s->row, col = s->col, row7 = s->row7;
1439  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1440  int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1441  int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1442  int have_a = row > 0, have_l = col > s->tile_col_start;
1443  int vref, filter_id;
1444 
1445  if (!s->s.h.segmentation.enabled) {
1446  b->seg_id = 0;
1447  } else if (s->s.h.keyframe || s->s.h.intraonly) {
1448  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
1450  } else if (!s->s.h.segmentation.update_map ||
1451  (s->s.h.segmentation.temporal &&
1454  s->left_segpred_ctx[row7]]))) {
1456  int pred = 8, x;
1458 
1461  for (y = 0; y < h4; y++) {
1462  int idx_base = (y + row) * 8 * s->sb_cols + col;
1463  for (x = 0; x < w4; x++)
1464  pred = FFMIN(pred, refsegmap[idx_base + x]);
1465  }
1466  av_assert1(pred < 8);
1467  b->seg_id = pred;
1468  } else {
1469  b->seg_id = 0;
1470  }
1471 
1472  memset(&s->above_segpred_ctx[col], 1, w4);
1473  memset(&s->left_segpred_ctx[row7], 1, h4);
1474  } else {
1476  s->s.h.segmentation.prob);
1477 
1478  memset(&s->above_segpred_ctx[col], 0, w4);
1479  memset(&s->left_segpred_ctx[row7], 0, h4);
1480  }
1481  if (s->s.h.segmentation.enabled &&
1482  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
1483  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1484  bw4, bh4, 8 * s->sb_cols, b->seg_id);
1485  }
1486 
1487  b->skip = s->s.h.segmentation.enabled &&
1488  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
1489  if (!b->skip) {
1490  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1491  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1492  s->counts.skip[c][b->skip]++;
1493  }
1494 
1495  if (s->s.h.keyframe || s->s.h.intraonly) {
1496  b->intra = 1;
1497  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1498  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
1499  } else {
1500  int c, bit;
1501 
1502  if (have_a && have_l) {
1503  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1504  c += (c == 2);
1505  } else {
1506  c = have_a ? 2 * s->above_intra_ctx[col] :
1507  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1508  }
1509  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1510  s->counts.intra[c][bit]++;
1511  b->intra = !bit;
1512  }
1513 
1514  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
1515  int c;
1516  if (have_a) {
1517  if (have_l) {
1518  c = (s->above_skip_ctx[col] ? max_tx :
1519  s->above_txfm_ctx[col]) +
1520  (s->left_skip_ctx[row7] ? max_tx :
1521  s->left_txfm_ctx[row7]) > max_tx;
1522  } else {
1523  c = s->above_skip_ctx[col] ? 1 :
1524  (s->above_txfm_ctx[col] * 2 > max_tx);
1525  }
1526  } else if (have_l) {
1527  c = s->left_skip_ctx[row7] ? 1 :
1528  (s->left_txfm_ctx[row7] * 2 > max_tx);
1529  } else {
1530  c = 1;
1531  }
1532  switch (max_tx) {
1533  case TX_32X32:
1534  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1535  if (b->tx) {
1536  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1537  if (b->tx == 2)
1538  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1539  }
1540  s->counts.tx32p[c][b->tx]++;
1541  break;
1542  case TX_16X16:
1543  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1544  if (b->tx)
1545  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1546  s->counts.tx16p[c][b->tx]++;
1547  break;
1548  case TX_8X8:
1549  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1550  s->counts.tx8p[c][b->tx]++;
1551  break;
1552  case TX_4X4:
1553  b->tx = TX_4X4;
1554  break;
1555  }
1556  } else {
1557  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
1558  }
1559 
1560  if (s->s.h.keyframe || s->s.h.intraonly) {
1561  uint8_t *a = &s->above_mode_ctx[col * 2];
1562  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1563 
1564  b->comp = 0;
1565  if (b->bs > BS_8x8) {
1566  // FIXME the memory storage intermediates here aren't really
1567  // necessary, they're just there to make the code slightly
1568  // simpler for now
1569  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1570  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1571  if (b->bs != BS_8x4) {
1573  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1574  l[0] = a[1] = b->mode[1];
1575  } else {
1576  l[0] = a[1] = b->mode[1] = b->mode[0];
1577  }
1578  if (b->bs != BS_4x8) {
1579  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1580  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1581  if (b->bs != BS_8x4) {
1583  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1584  l[1] = a[1] = b->mode[3];
1585  } else {
1586  l[1] = a[1] = b->mode[3] = b->mode[2];
1587  }
1588  } else {
1589  b->mode[2] = b->mode[0];
1590  l[1] = a[1] = b->mode[3] = b->mode[1];
1591  }
1592  } else {
1594  vp9_default_kf_ymode_probs[*a][*l]);
1595  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1596  // FIXME this can probably be optimized
1597  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1598  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1599  }
1602  } else if (b->intra) {
1603  b->comp = 0;
1604  if (b->bs > BS_8x8) {
1606  s->prob.p.y_mode[0]);
1607  s->counts.y_mode[0][b->mode[0]]++;
1608  if (b->bs != BS_8x4) {
1610  s->prob.p.y_mode[0]);
1611  s->counts.y_mode[0][b->mode[1]]++;
1612  } else {
1613  b->mode[1] = b->mode[0];
1614  }
1615  if (b->bs != BS_4x8) {
1617  s->prob.p.y_mode[0]);
1618  s->counts.y_mode[0][b->mode[2]]++;
1619  if (b->bs != BS_8x4) {
1621  s->prob.p.y_mode[0]);
1622  s->counts.y_mode[0][b->mode[3]]++;
1623  } else {
1624  b->mode[3] = b->mode[2];
1625  }
1626  } else {
1627  b->mode[2] = b->mode[0];
1628  b->mode[3] = b->mode[1];
1629  }
1630  } else {
1631  static const uint8_t size_group[10] = {
1632  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1633  };
1634  int sz = size_group[b->bs];
1635 
1637  s->prob.p.y_mode[sz]);
1638  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1639  s->counts.y_mode[sz][b->mode[3]]++;
1640  }
1642  s->prob.p.uv_mode[b->mode[3]]);
1643  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1644  } else {
1645  static const uint8_t inter_mode_ctx_lut[14][14] = {
1646  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1647  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1648  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1649  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1650  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1651  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1652  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1653  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1654  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1657  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1658  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1659  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1660  };
1661 
1662  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1663  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
1664  b->comp = 0;
1665  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
1666  } else {
1667  // read comp_pred flag
1668  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
1669  b->comp = s->s.h.comppredmode == PRED_COMPREF;
1670  } else {
1671  int c;
1672 
1673  // FIXME add intra as ref=0xff (or -1) to make these easier?
1674  if (have_a) {
1675  if (have_l) {
1676  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1677  c = 4;
1678  } else if (s->above_comp_ctx[col]) {
1679  c = 2 + (s->left_intra_ctx[row7] ||
1680  s->left_ref_ctx[row7] == s->s.h.fixcompref);
1681  } else if (s->left_comp_ctx[row7]) {
1682  c = 2 + (s->above_intra_ctx[col] ||
1683  s->above_ref_ctx[col] == s->s.h.fixcompref);
1684  } else {
1685  c = (!s->above_intra_ctx[col] &&
1686  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
1687  (!s->left_intra_ctx[row7] &&
1688  s->left_ref_ctx[row & 7] == s->s.h.fixcompref);
1689  }
1690  } else {
1691  c = s->above_comp_ctx[col] ? 3 :
1692  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
1693  }
1694  } else if (have_l) {
1695  c = s->left_comp_ctx[row7] ? 3 :
1696  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref);
1697  } else {
1698  c = 1;
1699  }
1700  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1701  s->counts.comp[c][b->comp]++;
1702  }
1703 
1704  // read actual references
1705  // FIXME probably cache a few variables here to prevent repetitive
1706  // memory accesses below
1707  if (b->comp) /* two references */ {
1708  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
1709 
1710  b->ref[fix_idx] = s->s.h.fixcompref;
1711  // FIXME can this codeblob be replaced by some sort of LUT?
1712  if (have_a) {
1713  if (have_l) {
1714  if (s->above_intra_ctx[col]) {
1715  if (s->left_intra_ctx[row7]) {
1716  c = 2;
1717  } else {
1718  c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1719  }
1720  } else if (s->left_intra_ctx[row7]) {
1721  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1722  } else {
1723  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1724 
1725  if (refl == refa && refa == s->s.h.varcompref[1]) {
1726  c = 0;
1727  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1728  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
1729  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
1730  c = 4;
1731  } else {
1732  c = (refa == refl) ? 3 : 1;
1733  }
1734  } else if (!s->left_comp_ctx[row7]) {
1735  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
1736  c = 1;
1737  } else {
1738  c = (refl == s->s.h.varcompref[1] &&
1739  refa != s->s.h.varcompref[1]) ? 2 : 4;
1740  }
1741  } else if (!s->above_comp_ctx[col]) {
1742  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
1743  c = 1;
1744  } else {
1745  c = (refa == s->s.h.varcompref[1] &&
1746  refl != s->s.h.varcompref[1]) ? 2 : 4;
1747  }
1748  } else {
1749  c = (refl == refa) ? 4 : 2;
1750  }
1751  }
1752  } else {
1753  if (s->above_intra_ctx[col]) {
1754  c = 2;
1755  } else if (s->above_comp_ctx[col]) {
1756  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1757  } else {
1758  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1759  }
1760  }
1761  } else if (have_l) {
1762  if (s->left_intra_ctx[row7]) {
1763  c = 2;
1764  } else if (s->left_comp_ctx[row7]) {
1765  c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1766  } else {
1767  c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1768  }
1769  } else {
1770  c = 2;
1771  }
1772  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1773  b->ref[var_idx] = s->s.h.varcompref[bit];
1774  s->counts.comp_ref[c][bit]++;
1775  } else /* single reference */ {
1776  int bit, c;
1777 
1778  if (have_a && !s->above_intra_ctx[col]) {
1779  if (have_l && !s->left_intra_ctx[row7]) {
1780  if (s->left_comp_ctx[row7]) {
1781  if (s->above_comp_ctx[col]) {
1782  c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] ||
1783  !s->above_ref_ctx[col]);
1784  } else {
1785  c = (3 * !s->above_ref_ctx[col]) +
1786  (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1787  }
1788  } else if (s->above_comp_ctx[col]) {
1789  c = (3 * !s->left_ref_ctx[row7]) +
1790  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1791  } else {
1792  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1793  }
1794  } else if (s->above_intra_ctx[col]) {
1795  c = 2;
1796  } else if (s->above_comp_ctx[col]) {
1797  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1798  } else {
1799  c = 4 * (!s->above_ref_ctx[col]);
1800  }
1801  } else if (have_l && !s->left_intra_ctx[row7]) {
1802  if (s->left_intra_ctx[row7]) {
1803  c = 2;
1804  } else if (s->left_comp_ctx[row7]) {
1805  c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1806  } else {
1807  c = 4 * (!s->left_ref_ctx[row7]);
1808  }
1809  } else {
1810  c = 2;
1811  }
1812  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1813  s->counts.single_ref[c][0][bit]++;
1814  if (!bit) {
1815  b->ref[0] = 0;
1816  } else {
1817  // FIXME can this codeblob be replaced by some sort of LUT?
1818  if (have_a) {
1819  if (have_l) {
1820  if (s->left_intra_ctx[row7]) {
1821  if (s->above_intra_ctx[col]) {
1822  c = 2;
1823  } else if (s->above_comp_ctx[col]) {
1824  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1825  s->above_ref_ctx[col] == 1);
1826  } else if (!s->above_ref_ctx[col]) {
1827  c = 3;
1828  } else {
1829  c = 4 * (s->above_ref_ctx[col] == 1);
1830  }
1831  } else if (s->above_intra_ctx[col]) {
1832  if (s->left_intra_ctx[row7]) {
1833  c = 2;
1834  } else if (s->left_comp_ctx[row7]) {
1835  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1836  s->left_ref_ctx[row7] == 1);
1837  } else if (!s->left_ref_ctx[row7]) {
1838  c = 3;
1839  } else {
1840  c = 4 * (s->left_ref_ctx[row7] == 1);
1841  }
1842  } else if (s->above_comp_ctx[col]) {
1843  if (s->left_comp_ctx[row7]) {
1844  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1845  c = 3 * (s->s.h.fixcompref == 1 ||
1846  s->left_ref_ctx[row7] == 1);
1847  } else {
1848  c = 2;
1849  }
1850  } else if (!s->left_ref_ctx[row7]) {
1851  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1852  s->above_ref_ctx[col] == 1);
1853  } else {
1854  c = 3 * (s->left_ref_ctx[row7] == 1) +
1855  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1856  }
1857  } else if (s->left_comp_ctx[row7]) {
1858  if (!s->above_ref_ctx[col]) {
1859  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1860  s->left_ref_ctx[row7] == 1);
1861  } else {
1862  c = 3 * (s->above_ref_ctx[col] == 1) +
1863  (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1864  }
1865  } else if (!s->above_ref_ctx[col]) {
1866  if (!s->left_ref_ctx[row7]) {
1867  c = 3;
1868  } else {
1869  c = 4 * (s->left_ref_ctx[row7] == 1);
1870  }
1871  } else if (!s->left_ref_ctx[row7]) {
1872  c = 4 * (s->above_ref_ctx[col] == 1);
1873  } else {
1874  c = 2 * (s->left_ref_ctx[row7] == 1) +
1875  2 * (s->above_ref_ctx[col] == 1);
1876  }
1877  } else {
1878  if (s->above_intra_ctx[col] ||
1879  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1880  c = 2;
1881  } else if (s->above_comp_ctx[col]) {
1882  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1883  } else {
1884  c = 4 * (s->above_ref_ctx[col] == 1);
1885  }
1886  }
1887  } else if (have_l) {
1888  if (s->left_intra_ctx[row7] ||
1889  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1890  c = 2;
1891  } else if (s->left_comp_ctx[row7]) {
1892  c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1893  } else {
1894  c = 4 * (s->left_ref_ctx[row7] == 1);
1895  }
1896  } else {
1897  c = 2;
1898  }
1899  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1900  s->counts.single_ref[c][1][bit]++;
1901  b->ref[0] = 1 + bit;
1902  }
1903  }
1904  }
1905 
1906  if (b->bs <= BS_8x8) {
1907  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
1908  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1909  } else {
1910  static const uint8_t off[10] = {
1911  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1912  };
1913 
1914  // FIXME this needs to use the LUT tables from find_ref_mvs
1915  // because not all are -1,0/0,-1
1916  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1917  [s->left_mode_ctx[row7 + off[b->bs]]];
1918 
1920  s->prob.p.mv_mode[c]);
1921  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1922  s->counts.mv_mode[c][b->mode[0] - 10]++;
1923  }
1924  }
1925 
1926  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
1927  int c;
1928 
1929  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1930  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1931  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1932  s->left_filter_ctx[row7] : 3;
1933  } else {
1934  c = s->above_filter_ctx[col];
1935  }
1936  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1937  c = s->left_filter_ctx[row7];
1938  } else {
1939  c = 3;
1940  }
1941 
1942  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1943  s->prob.p.filter[c]);
1944  s->counts.filter[c][filter_id]++;
1945  b->filter = vp9_filter_lut[filter_id];
1946  } else {
1947  b->filter = s->s.h.filtermode;
1948  }
1949 
1950  if (b->bs > BS_8x8) {
1951  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1952 
1954  s->prob.p.mv_mode[c]);
1955  s->counts.mv_mode[c][b->mode[0] - 10]++;
1956  fill_mv(s, b->mv[0], b->mode[0], 0);
1957 
1958  if (b->bs != BS_8x4) {
1960  s->prob.p.mv_mode[c]);
1961  s->counts.mv_mode[c][b->mode[1] - 10]++;
1962  fill_mv(s, b->mv[1], b->mode[1], 1);
1963  } else {
1964  b->mode[1] = b->mode[0];
1965  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1966  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1967  }
1968 
1969  if (b->bs != BS_4x8) {
1971  s->prob.p.mv_mode[c]);
1972  s->counts.mv_mode[c][b->mode[2] - 10]++;
1973  fill_mv(s, b->mv[2], b->mode[2], 2);
1974 
1975  if (b->bs != BS_8x4) {
1977  s->prob.p.mv_mode[c]);
1978  s->counts.mv_mode[c][b->mode[3] - 10]++;
1979  fill_mv(s, b->mv[3], b->mode[3], 3);
1980  } else {
1981  b->mode[3] = b->mode[2];
1982  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1983  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1984  }
1985  } else {
1986  b->mode[2] = b->mode[0];
1987  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1988  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1989  b->mode[3] = b->mode[1];
1990  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1991  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1992  }
1993  } else {
1994  fill_mv(s, b->mv[0], b->mode[0], -1);
1995  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1996  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1997  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1998  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1999  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2000  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2001  }
2002 
2003  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
2004  }
2005 
2006 #if HAVE_FAST_64BIT
2007 #define SPLAT_CTX(var, val, n) \
2008  switch (n) { \
2009  case 1: var = val; break; \
2010  case 2: AV_WN16A(&var, val * 0x0101); break; \
2011  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2012  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2013  case 16: { \
2014  uint64_t v64 = val * 0x0101010101010101ULL; \
2015  AV_WN64A( &var, v64); \
2016  AV_WN64A(&((uint8_t *) &var)[8], v64); \
2017  break; \
2018  } \
2019  }
2020 #else
2021 #define SPLAT_CTX(var, val, n) \
2022  switch (n) { \
2023  case 1: var = val; break; \
2024  case 2: AV_WN16A(&var, val * 0x0101); break; \
2025  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2026  case 8: { \
2027  uint32_t v32 = val * 0x01010101; \
2028  AV_WN32A( &var, v32); \
2029  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2030  break; \
2031  } \
2032  case 16: { \
2033  uint32_t v32 = val * 0x01010101; \
2034  AV_WN32A( &var, v32); \
2035  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2036  AV_WN32A(&((uint8_t *) &var)[8], v32); \
2037  AV_WN32A(&((uint8_t *) &var)[12], v32); \
2038  break; \
2039  } \
2040  }
2041 #endif
2042 
2043  switch (bwh_tab[1][b->bs][0]) {
2044 #define SET_CTXS(dir, off, n) \
2045  do { \
2046  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2047  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2048  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2049  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
2050  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2051  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2052  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2053  if (!b->intra) { \
2054  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2055  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
2056  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2057  } \
2058  } \
2059  } \
2060  } while (0)
2061  case 1: SET_CTXS(above, col, 1); break;
2062  case 2: SET_CTXS(above, col, 2); break;
2063  case 4: SET_CTXS(above, col, 4); break;
2064  case 8: SET_CTXS(above, col, 8); break;
2065  }
2066  switch (bwh_tab[1][b->bs][1]) {
2067  case 1: SET_CTXS(left, row7, 1); break;
2068  case 2: SET_CTXS(left, row7, 2); break;
2069  case 4: SET_CTXS(left, row7, 4); break;
2070  case 8: SET_CTXS(left, row7, 8); break;
2071  }
2072 #undef SPLAT_CTX
2073 #undef SET_CTXS
2074 
2075  if (!s->s.h.keyframe && !s->s.h.intraonly) {
2076  if (b->bs > BS_8x8) {
2077  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2078 
2079  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2080  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2081  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2082  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2083  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2084  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2085  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2086  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2087  } else {
2088  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2089 
2090  for (n = 0; n < w4 * 2; n++) {
2091  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2092  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2093  }
2094  for (n = 0; n < h4 * 2; n++) {
2095  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2096  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2097  }
2098  }
2099  }
2100 
2101  // FIXME kinda ugly
2102  for (y = 0; y < h4; y++) {
2103  int x, o = (row + y) * s->sb_cols * 8 + col;
2104  struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
2105 
2106  if (b->intra) {
2107  for (x = 0; x < w4; x++) {
2108  mv[x].ref[0] =
2109  mv[x].ref[1] = -1;
2110  }
2111  } else if (b->comp) {
2112  for (x = 0; x < w4; x++) {
2113  mv[x].ref[0] = b->ref[0];
2114  mv[x].ref[1] = b->ref[1];
2115  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2116  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2117  }
2118  } else {
2119  for (x = 0; x < w4; x++) {
2120  mv[x].ref[0] = b->ref[0];
2121  mv[x].ref[1] = -1;
2122  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2123  }
2124  }
2125  }
2126 }
2127 
2128 // FIXME merge cnt/eob arguments?
2129 static av_always_inline int
2130 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2131  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2132  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2133  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2134  const int16_t *band_counts, const int16_t *qmul)
2135 {
2136  int i = 0, band = 0, band_left = band_counts[band];
2137  uint8_t *tp = p[0][nnz];
2138  uint8_t cache[1024];
2139 
2140  do {
2141  int val, rc;
2142 
2143  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2144  eob[band][nnz][val]++;
2145  if (!val)
2146  break;
2147 
2148  skip_eob:
2149  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2150  cnt[band][nnz][0]++;
2151  if (!--band_left)
2152  band_left = band_counts[++band];
2153  cache[scan[i]] = 0;
2154  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2155  tp = p[band][nnz];
2156  if (++i == n_coeffs)
2157  break; //invalid input; blocks should end with EOB
2158  goto skip_eob;
2159  }
2160 
2161  rc = scan[i];
2162  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2163  cnt[band][nnz][1]++;
2164  val = 1;
2165  cache[rc] = 1;
2166  } else {
2167  // fill in p[3-10] (model fill) - only once per frame for each pos
2168  if (!tp[3])
2169  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2170 
2171  cnt[band][nnz][2]++;
2172  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2173  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2174  cache[rc] = val = 2;
2175  } else {
2176  val = 3 + vp56_rac_get_prob(c, tp[5]);
2177  cache[rc] = 3;
2178  }
2179  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2180  cache[rc] = 4;
2181  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2182  val = 5 + vp56_rac_get_prob(c, 159);
2183  } else {
2184  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2185  val += vp56_rac_get_prob(c, 145);
2186  }
2187  } else { // cat 3-6
2188  cache[rc] = 5;
2189  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2190  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2191  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2192  val += (vp56_rac_get_prob(c, 148) << 1);
2193  val += vp56_rac_get_prob(c, 140);
2194  } else {
2195  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2196  val += (vp56_rac_get_prob(c, 155) << 2);
2197  val += (vp56_rac_get_prob(c, 140) << 1);
2198  val += vp56_rac_get_prob(c, 135);
2199  }
2200  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2201  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2202  val += (vp56_rac_get_prob(c, 157) << 3);
2203  val += (vp56_rac_get_prob(c, 141) << 2);
2204  val += (vp56_rac_get_prob(c, 134) << 1);
2205  val += vp56_rac_get_prob(c, 130);
2206  } else {
2207  val = 67;
2208  if (!is8bitsperpixel) {
2209  if (bpp == 12) {
2210  val += vp56_rac_get_prob(c, 255) << 17;
2211  val += vp56_rac_get_prob(c, 255) << 16;
2212  }
2213  val += (vp56_rac_get_prob(c, 255) << 15);
2214  val += (vp56_rac_get_prob(c, 255) << 14);
2215  }
2216  val += (vp56_rac_get_prob(c, 254) << 13);
2217  val += (vp56_rac_get_prob(c, 254) << 12);
2218  val += (vp56_rac_get_prob(c, 254) << 11);
2219  val += (vp56_rac_get_prob(c, 252) << 10);
2220  val += (vp56_rac_get_prob(c, 249) << 9);
2221  val += (vp56_rac_get_prob(c, 243) << 8);
2222  val += (vp56_rac_get_prob(c, 230) << 7);
2223  val += (vp56_rac_get_prob(c, 196) << 6);
2224  val += (vp56_rac_get_prob(c, 177) << 5);
2225  val += (vp56_rac_get_prob(c, 153) << 4);
2226  val += (vp56_rac_get_prob(c, 140) << 3);
2227  val += (vp56_rac_get_prob(c, 133) << 2);
2228  val += (vp56_rac_get_prob(c, 130) << 1);
2229  val += vp56_rac_get_prob(c, 129);
2230  }
2231  }
2232  }
2233 #define STORE_COEF(c, i, v) do { \
2234  if (is8bitsperpixel) { \
2235  c[i] = v; \
2236  } else { \
2237  AV_WN32A(&c[i * 2], v); \
2238  } \
2239 } while (0)
2240  if (!--band_left)
2241  band_left = band_counts[++band];
2242  if (is_tx32x32)
2243  STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2244  else
2245  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2246  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2247  tp = p[band][nnz];
2248  } while (++i < n_coeffs);
2249 
2250  return i;
2251 }
2252 
2253 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2254  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2255  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2256  const int16_t (*nb)[2], const int16_t *band_counts,
2257  const int16_t *qmul)
2258 {
2259  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2260  nnz, scan, nb, band_counts, qmul);
2261 }
2262 
2263 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2264  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2265  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2266  const int16_t (*nb)[2], const int16_t *band_counts,
2267  const int16_t *qmul)
2268 {
2269  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2270  nnz, scan, nb, band_counts, qmul);
2271 }
2272 
2273 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2274  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2275  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2276  const int16_t (*nb)[2], const int16_t *band_counts,
2277  const int16_t *qmul)
2278 {
2279  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2280  nnz, scan, nb, band_counts, qmul);
2281 }
2282 
2283 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2284  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2285  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2286  const int16_t (*nb)[2], const int16_t *band_counts,
2287  const int16_t *qmul)
2288 {
2289  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2290  nnz, scan, nb, band_counts, qmul);
2291 }
2292 
2293 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2294 {
2295  VP9Context *s = ctx->priv_data;
2296  VP9Block *b = s->b;
2297  int row = s->row, col = s->col;
2298  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2299  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2300  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2301  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2302  int end_x = FFMIN(2 * (s->cols - col), w4);
2303  int end_y = FFMIN(2 * (s->rows - row), h4);
2304  int n, pl, x, y, res;
2305  int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
2306  int tx = 4 * s->s.h.lossless + b->tx;
2307  const int16_t * const *yscans = vp9_scans[tx];
2308  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2309  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2310  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2311  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2312  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2313  static const int16_t band_counts[4][8] = {
2314  { 1, 2, 3, 4, 3, 16 - 13 },
2315  { 1, 2, 3, 4, 11, 64 - 21 },
2316  { 1, 2, 3, 4, 11, 256 - 21 },
2317  { 1, 2, 3, 4, 11, 1024 - 21 },
2318  };
2319  const int16_t *y_band_counts = band_counts[b->tx];
2320  const int16_t *uv_band_counts = band_counts[b->uvtx];
2321  int bytesperpixel = is8bitsperpixel ? 1 : 2;
2322  int total_coeff = 0;
2323 
2324 #define MERGE(la, end, step, rd) \
2325  for (n = 0; n < end; n += step) \
2326  la[n] = !!rd(&la[n])
2327 #define MERGE_CTX(step, rd) \
2328  do { \
2329  MERGE(l, end_y, step, rd); \
2330  MERGE(a, end_x, step, rd); \
2331  } while (0)
2332 
2333 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2334  for (n = 0, y = 0; y < end_y; y += step) { \
2335  for (x = 0; x < end_x; x += step, n += step * step) { \
2336  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2337  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2338  (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2339  c, e, p, a[x] + l[y], yscans[txtp], \
2340  ynbs[txtp], y_band_counts, qmul[0]); \
2341  a[x] = l[y] = !!res; \
2342  total_coeff |= !!res; \
2343  if (step >= 4) { \
2344  AV_WN16A(&s->eob[n], res); \
2345  } else { \
2346  s->eob[n] = res; \
2347  } \
2348  } \
2349  }
2350 
2351 #define SPLAT(la, end, step, cond) \
2352  if (step == 2) { \
2353  for (n = 1; n < end; n += step) \
2354  la[n] = la[n - 1]; \
2355  } else if (step == 4) { \
2356  if (cond) { \
2357  for (n = 0; n < end; n += step) \
2358  AV_WN32A(&la[n], la[n] * 0x01010101); \
2359  } else { \
2360  for (n = 0; n < end; n += step) \
2361  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2362  } \
2363  } else /* step == 8 */ { \
2364  if (cond) { \
2365  if (HAVE_FAST_64BIT) { \
2366  for (n = 0; n < end; n += step) \
2367  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2368  } else { \
2369  for (n = 0; n < end; n += step) { \
2370  uint32_t v32 = la[n] * 0x01010101; \
2371  AV_WN32A(&la[n], v32); \
2372  AV_WN32A(&la[n + 4], v32); \
2373  } \
2374  } \
2375  } else { \
2376  for (n = 0; n < end; n += step) \
2377  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2378  } \
2379  }
2380 #define SPLAT_CTX(step) \
2381  do { \
2382  SPLAT(a, end_x, step, end_x == w4); \
2383  SPLAT(l, end_y, step, end_y == h4); \
2384  } while (0)
2385 
2386  /* y tokens */
2387  switch (b->tx) {
2388  case TX_4X4:
2389  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2390  break;
2391  case TX_8X8:
2392  MERGE_CTX(2, AV_RN16A);
2393  DECODE_Y_COEF_LOOP(2, 0,);
2394  SPLAT_CTX(2);
2395  break;
2396  case TX_16X16:
2397  MERGE_CTX(4, AV_RN32A);
2398  DECODE_Y_COEF_LOOP(4, 0,);
2399  SPLAT_CTX(4);
2400  break;
2401  case TX_32X32:
2402  MERGE_CTX(8, AV_RN64A);
2403  DECODE_Y_COEF_LOOP(8, 0, 32);
2404  SPLAT_CTX(8);
2405  break;
2406  }
2407 
2408 #define DECODE_UV_COEF_LOOP(step, v) \
2409  for (n = 0, y = 0; y < end_y; y += step) { \
2410  for (x = 0; x < end_x; x += step, n += step * step) { \
2411  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2412  (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2413  16 * step * step, c, e, p, a[x] + l[y], \
2414  uvscan, uvnb, uv_band_counts, qmul[1]); \
2415  a[x] = l[y] = !!res; \
2416  total_coeff |= !!res; \
2417  if (step >= 4) { \
2418  AV_WN16A(&s->uveob[pl][n], res); \
2419  } else { \
2420  s->uveob[pl][n] = res; \
2421  } \
2422  } \
2423  }
2424 
2425  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2426  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2427  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2428  w4 >>= s->ss_h;
2429  end_x >>= s->ss_h;
2430  h4 >>= s->ss_v;
2431  end_y >>= s->ss_v;
2432  for (pl = 0; pl < 2; pl++) {
2433  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2434  l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2435  switch (b->uvtx) {
2436  case TX_4X4:
2437  DECODE_UV_COEF_LOOP(1,);
2438  break;
2439  case TX_8X8:
2440  MERGE_CTX(2, AV_RN16A);
2441  DECODE_UV_COEF_LOOP(2,);
2442  SPLAT_CTX(2);
2443  break;
2444  case TX_16X16:
2445  MERGE_CTX(4, AV_RN32A);
2446  DECODE_UV_COEF_LOOP(4,);
2447  SPLAT_CTX(4);
2448  break;
2449  case TX_32X32:
2450  MERGE_CTX(8, AV_RN64A);
2451  DECODE_UV_COEF_LOOP(8, 32);
2452  SPLAT_CTX(8);
2453  break;
2454  }
2455  }
2456 
2457  return total_coeff;
2458 }
2459 
2461 {
2462  return decode_coeffs(ctx, 1);
2463 }
2464 
2466 {
2467  return decode_coeffs(ctx, 0);
2468 }
2469 
2471  uint8_t *dst_edge, ptrdiff_t stride_edge,
2472  uint8_t *dst_inner, ptrdiff_t stride_inner,
2473  uint8_t *l, int col, int x, int w,
2474  int row, int y, enum TxfmMode tx,
2475  int p, int ss_h, int ss_v, int bytesperpixel)
2476 {
2477  int have_top = row > 0 || y > 0;
2478  int have_left = col > s->tile_col_start || x > 0;
2479  int have_right = x < w - 1;
2480  int bpp = s->bpp;
2481  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2482  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2483  { DC_127_PRED, VERT_PRED } },
2484  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2485  { HOR_PRED, HOR_PRED } },
2486  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2487  { LEFT_DC_PRED, DC_PRED } },
2497  { DC_127_PRED, VERT_LEFT_PRED } },
2498  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2499  { HOR_UP_PRED, HOR_UP_PRED } },
2500  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2501  { HOR_PRED, TM_VP8_PRED } },
2502  };
2503  static const struct {
2504  uint8_t needs_left:1;
2505  uint8_t needs_top:1;
2506  uint8_t needs_topleft:1;
2507  uint8_t needs_topright:1;
2508  uint8_t invert_left:1;
2509  } edges[N_INTRA_PRED_MODES] = {
2510  [VERT_PRED] = { .needs_top = 1 },
2511  [HOR_PRED] = { .needs_left = 1 },
2512  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2513  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2514  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2515  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2516  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2517  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2518  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2519  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2520  [LEFT_DC_PRED] = { .needs_left = 1 },
2521  [TOP_DC_PRED] = { .needs_top = 1 },
2522  [DC_128_PRED] = { 0 },
2523  [DC_127_PRED] = { 0 },
2524  [DC_129_PRED] = { 0 }
2525  };
2526 
2527  av_assert2(mode >= 0 && mode < 10);
2528  mode = mode_conv[mode][have_left][have_top];
2529  if (edges[mode].needs_top) {
2530  uint8_t *top, *topleft;
2531  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2532  int n_px_need_tr = 0;
2533 
2534  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2535  n_px_need_tr = 4;
2536 
2537  // if top of sb64-row, use s->intra_pred_data[] instead of
2538  // dst[-stride] for intra prediction (it contains pre- instead of
2539  // post-loopfilter data)
2540  if (have_top) {
2541  top = !(row & 7) && !y ?
2542  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2543  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2544  if (have_left)
2545  topleft = !(row & 7) && !y ?
2546  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2547  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2548  &dst_inner[-stride_inner];
2549  }
2550 
2551  if (have_top &&
2552  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2553  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2554  n_px_need + n_px_need_tr <= n_px_have) {
2555  *a = top;
2556  } else {
2557  if (have_top) {
2558  if (n_px_need <= n_px_have) {
2559  memcpy(*a, top, n_px_need * bytesperpixel);
2560  } else {
2561 #define memset_bpp(c, i1, v, i2, num) do { \
2562  if (bytesperpixel == 1) { \
2563  memset(&(c)[(i1)], (v)[(i2)], (num)); \
2564  } else { \
2565  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2566  for (n = 0; n < (num); n++) { \
2567  AV_WN16A(&(c)[((i1) + n) * 2], val); \
2568  } \
2569  } \
2570 } while (0)
2571  memcpy(*a, top, n_px_have * bytesperpixel);
2572  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2573  }
2574  } else {
2575 #define memset_val(c, val, num) do { \
2576  if (bytesperpixel == 1) { \
2577  memset((c), (val), (num)); \
2578  } else { \
2579  int n; \
2580  for (n = 0; n < (num); n++) { \
2581  AV_WN16A(&(c)[n * 2], (val)); \
2582  } \
2583  } \
2584 } while (0)
2585  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2586  }
2587  if (edges[mode].needs_topleft) {
2588  if (have_left && have_top) {
2589 #define assign_bpp(c, i1, v, i2) do { \
2590  if (bytesperpixel == 1) { \
2591  (c)[(i1)] = (v)[(i2)]; \
2592  } else { \
2593  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2594  } \
2595 } while (0)
2596  assign_bpp(*a, -1, topleft, -1);
2597  } else {
2598 #define assign_val(c, i, v) do { \
2599  if (bytesperpixel == 1) { \
2600  (c)[(i)] = (v); \
2601  } else { \
2602  AV_WN16A(&(c)[(i) * 2], (v)); \
2603  } \
2604 } while (0)
2605  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2606  }
2607  }
2608  if (tx == TX_4X4 && edges[mode].needs_topright) {
2609  if (have_top && have_right &&
2610  n_px_need + n_px_need_tr <= n_px_have) {
2611  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2612  } else {
2613  memset_bpp(*a, 4, *a, 3, 4);
2614  }
2615  }
2616  }
2617  }
2618  if (edges[mode].needs_left) {
2619  if (have_left) {
2620  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2621  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2622  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2623 
2624  if (edges[mode].invert_left) {
2625  if (n_px_need <= n_px_have) {
2626  for (i = 0; i < n_px_need; i++)
2627  assign_bpp(l, i, &dst[i * stride], -1);
2628  } else {
2629  for (i = 0; i < n_px_have; i++)
2630  assign_bpp(l, i, &dst[i * stride], -1);
2631  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2632  }
2633  } else {
2634  if (n_px_need <= n_px_have) {
2635  for (i = 0; i < n_px_need; i++)
2636  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2637  } else {
2638  for (i = 0; i < n_px_have; i++)
2639  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2640  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2641  }
2642  }
2643  } else {
2644  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2645  }
2646  }
2647 
2648  return mode;
2649 }
2650 
2651 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2652  ptrdiff_t uv_off, int bytesperpixel)
2653 {
2654  VP9Context *s = ctx->priv_data;
2655  VP9Block *b = s->b;
2656  int row = s->row, col = s->col;
2657  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2658  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2659  int end_x = FFMIN(2 * (s->cols - col), w4);
2660  int end_y = FFMIN(2 * (s->rows - row), h4);
2661  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
2662  int uvstep1d = 1 << b->uvtx, p;
2663  uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
2664  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2665  LOCAL_ALIGNED_32(uint8_t, l, [64]);
2666 
2667  for (n = 0, y = 0; y < end_y; y += step1d) {
2668  uint8_t *ptr = dst, *ptr_r = dst_r;
2669  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2670  ptr_r += 4 * step1d * bytesperpixel, n += step) {
2671  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2672  y * 2 + x : 0];
2673  uint8_t *a = &a_buf[32];
2674  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2675  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2676 
2677  mode = check_intra_mode(s, mode, &a, ptr_r,
2678  s->s.frames[CUR_FRAME].tf.f->linesize[0],
2679  ptr, s->y_stride, l,
2680  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2681  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2682  if (eob)
2683  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2684  s->block + 16 * n * bytesperpixel, eob);
2685  }
2686  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
2687  dst += 4 * step1d * s->y_stride;
2688  }
2689 
2690  // U/V
2691  w4 >>= s->ss_h;
2692  end_x >>= s->ss_h;
2693  end_y >>= s->ss_v;
2694  step = 1 << (b->uvtx * 2);
2695  for (p = 0; p < 2; p++) {
2696  dst = s->dst[1 + p];
2697  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2698  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2699  uint8_t *ptr = dst, *ptr_r = dst_r;
2700  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2701  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2702  int mode = b->uvmode;
2703  uint8_t *a = &a_buf[32];
2704  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2705 
2706  mode = check_intra_mode(s, mode, &a, ptr_r,
2707  s->s.frames[CUR_FRAME].tf.f->linesize[1],
2708  ptr, s->uv_stride, l, col, x, w4, row, y,
2709  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2710  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2711  if (eob)
2712  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2713  s->uvblock[p] + 16 * n * bytesperpixel, eob);
2714  }
2715  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
2716  dst += 4 * uvstep1d * s->uv_stride;
2717  }
2718  }
2719 }
2720 
2721 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2722 {
2723  intra_recon(ctx, y_off, uv_off, 1);
2724 }
2725 
2726 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2727 {
2728  intra_recon(ctx, y_off, uv_off, 2);
2729 }
2730 
2732  uint8_t *dst, ptrdiff_t dst_stride,
2733  const uint8_t *ref, ptrdiff_t ref_stride,
2735  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2736  int bw, int bh, int w, int h, int bytesperpixel)
2737 {
2738  int mx = mv->x, my = mv->y, th;
2739 
2740  y += my >> 3;
2741  x += mx >> 3;
2742  ref += y * ref_stride + x * bytesperpixel;
2743  mx &= 7;
2744  my &= 7;
2745  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2746  // we use +7 because the last 7 pixels of each sbrow can be changed in
2747  // the longest loopfilter of the next sbrow
2748  th = (y + bh + 4 * !!my + 7) >> 6;
2749  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2750  if (x < !!mx * 3 || y < !!my * 3 ||
2751  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2753  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2754  160, ref_stride,
2755  bw + !!mx * 7, bh + !!my * 7,
2756  x - !!mx * 3, y - !!my * 3, w, h);
2757  ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2758  ref_stride = 160;
2759  }
2760  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2761 }
2762 
2764  uint8_t *dst_u, uint8_t *dst_v,
2765  ptrdiff_t dst_stride,
2766  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2767  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2769  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2770  int bw, int bh, int w, int h, int bytesperpixel)
2771 {
2772  int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2773 
2774  y += my >> 4;
2775  x += mx >> 4;
2776  ref_u += y * src_stride_u + x * bytesperpixel;
2777  ref_v += y * src_stride_v + x * bytesperpixel;
2778  mx &= 15;
2779  my &= 15;
2780  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2781  // we use +7 because the last 7 pixels of each sbrow can be changed in
2782  // the longest loopfilter of the next sbrow
2783  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2784  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2785  if (x < !!mx * 3 || y < !!my * 3 ||
2786  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2788  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2789  160, src_stride_u,
2790  bw + !!mx * 7, bh + !!my * 7,
2791  x - !!mx * 3, y - !!my * 3, w, h);
2792  ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2793  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2794 
2796  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2797  160, src_stride_v,
2798  bw + !!mx * 7, bh + !!my * 7,
2799  x - !!mx * 3, y - !!my * 3, w, h);
2800  ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2801  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2802  } else {
2803  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2804  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2805  }
2806 }
2807 
2808 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2809  px, py, pw, ph, bw, bh, w, h, i) \
2810  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2811  mv, bw, bh, w, h, bytesperpixel)
2812 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2813  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2814  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2815  row, col, mv, bw, bh, w, h, bytesperpixel)
2816 #define SCALED 0
2817 #define FN(x) x##_8bpp
2818 #define BYTES_PER_PIXEL 1
2819 #include "vp9_mc_template.c"
2820 #undef FN
2821 #undef BYTES_PER_PIXEL
2822 #define FN(x) x##_16bpp
2823 #define BYTES_PER_PIXEL 2
2824 #include "vp9_mc_template.c"
2825 #undef mc_luma_dir
2826 #undef mc_chroma_dir
2827 #undef FN
2828 #undef BYTES_PER_PIXEL
2829 #undef SCALED
2830 
2832  vp9_mc_func (*mc)[2],
2833  uint8_t *dst, ptrdiff_t dst_stride,
2834  const uint8_t *ref, ptrdiff_t ref_stride,
2836  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2837  int px, int py, int pw, int ph,
2838  int bw, int bh, int w, int h, int bytesperpixel,
2839  const uint16_t *scale, const uint8_t *step)
2840 {
2841  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2842  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2843  mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
2844  y, x, in_mv, bw, bh, w, h, bytesperpixel);
2845  } else {
2846 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2847  int mx, my;
2848  int refbw_m1, refbh_m1;
2849  int th;
2850  VP56mv mv;
2851 
2852  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2853  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2854  // BUG libvpx seems to scale the two components separately. This introduces
2855  // rounding errors but we have to reproduce them to be exactly compatible
2856  // with the output from libvpx...
2857  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2858  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2859 
2860  y = my >> 4;
2861  x = mx >> 4;
2862  ref += y * ref_stride + x * bytesperpixel;
2863  mx &= 15;
2864  my &= 15;
2865  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2866  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2867  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2868  // we use +7 because the last 7 pixels of each sbrow can be changed in
2869  // the longest loopfilter of the next sbrow
2870  th = (y + refbh_m1 + 4 + 7) >> 6;
2871  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2872  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2874  ref - 3 * ref_stride - 3 * bytesperpixel,
2875  288, ref_stride,
2876  refbw_m1 + 8, refbh_m1 + 8,
2877  x - 3, y - 3, w, h);
2878  ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2879  ref_stride = 288;
2880  }
2881  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2882  }
2883 }
2884 
2886  vp9_mc_func (*mc)[2],
2887  uint8_t *dst_u, uint8_t *dst_v,
2888  ptrdiff_t dst_stride,
2889  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2890  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2892  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2893  int px, int py, int pw, int ph,
2894  int bw, int bh, int w, int h, int bytesperpixel,
2895  const uint16_t *scale, const uint8_t *step)
2896 {
2897  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2898  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2899  mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
2900  ref_v, src_stride_v, ref_frame,
2901  y, x, in_mv, bw, bh, w, h, bytesperpixel);
2902  } else {
2903  int mx, my;
2904  int refbw_m1, refbh_m1;
2905  int th;
2906  VP56mv mv;
2907 
2908  if (s->ss_h) {
2909  // BUG https://code.google.com/p/webm/issues/detail?id=820
2910  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2911  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2912  } else {
2913  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2914  mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2915  }
2916  if (s->ss_v) {
2917  // BUG https://code.google.com/p/webm/issues/detail?id=820
2918  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2919  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2920  } else {
2921  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2922  my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2923  }
2924 #undef scale_mv
2925  y = my >> 4;
2926  x = mx >> 4;
2927  ref_u += y * src_stride_u + x * bytesperpixel;
2928  ref_v += y * src_stride_v + x * bytesperpixel;
2929  mx &= 15;
2930  my &= 15;
2931  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2932  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2933  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2934  // we use +7 because the last 7 pixels of each sbrow can be changed in
2935  // the longest loopfilter of the next sbrow
2936  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2937  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2938  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2940  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2941  288, src_stride_u,
2942  refbw_m1 + 8, refbh_m1 + 8,
2943  x - 3, y - 3, w, h);
2944  ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2945  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2946 
2948  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2949  288, src_stride_v,
2950  refbw_m1 + 8, refbh_m1 + 8,
2951  x - 3, y - 3, w, h);
2952  ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2953  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2954  } else {
2955  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2956  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2957  }
2958  }
2959 }
2960 
2961 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2962  px, py, pw, ph, bw, bh, w, h, i) \
2963  mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2964  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2965  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2966 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2967  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2968  mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2969  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2970  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2971 #define SCALED 1
2972 #define FN(x) x##_scaled_8bpp
2973 #define BYTES_PER_PIXEL 1
2974 #include "vp9_mc_template.c"
2975 #undef FN
2976 #undef BYTES_PER_PIXEL
2977 #define FN(x) x##_scaled_16bpp
2978 #define BYTES_PER_PIXEL 2
2979 #include "vp9_mc_template.c"
2980 #undef mc_luma_dir
2981 #undef mc_chroma_dir
2982 #undef FN
2983 #undef BYTES_PER_PIXEL
2984 #undef SCALED
2985 
2986 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
2987 {
2988  VP9Context *s = ctx->priv_data;
2989  VP9Block *b = s->b;
2990  int row = s->row, col = s->col;
2991 
2992  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2993  if (bytesperpixel == 1) {
2994  inter_pred_scaled_8bpp(ctx);
2995  } else {
2996  inter_pred_scaled_16bpp(ctx);
2997  }
2998  } else {
2999  if (bytesperpixel == 1) {
3000  inter_pred_8bpp(ctx);
3001  } else {
3002  inter_pred_16bpp(ctx);
3003  }
3004  }
3005  if (!b->skip) {
3006  /* mostly copied intra_recon() */
3007 
3008  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3009  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3010  int end_x = FFMIN(2 * (s->cols - col), w4);
3011  int end_y = FFMIN(2 * (s->rows - row), h4);
3012  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
3013  int uvstep1d = 1 << b->uvtx, p;
3014  uint8_t *dst = s->dst[0];
3015 
3016  // y itxfm add
3017  for (n = 0, y = 0; y < end_y; y += step1d) {
3018  uint8_t *ptr = dst;
3019  for (x = 0; x < end_x; x += step1d,
3020  ptr += 4 * step1d * bytesperpixel, n += step) {
3021  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3022 
3023  if (eob)
3024  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3025  s->block + 16 * n * bytesperpixel, eob);
3026  }
3027  dst += 4 * s->y_stride * step1d;
3028  }
3029 
3030  // uv itxfm add
3031  end_x >>= s->ss_h;
3032  end_y >>= s->ss_v;
3033  step = 1 << (b->uvtx * 2);
3034  for (p = 0; p < 2; p++) {
3035  dst = s->dst[p + 1];
3036  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3037  uint8_t *ptr = dst;
3038  for (x = 0; x < end_x; x += uvstep1d,
3039  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3040  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3041 
3042  if (eob)
3043  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3044  s->uvblock[p] + 16 * n * bytesperpixel, eob);
3045  }
3046  dst += 4 * uvstep1d * s->uv_stride;
3047  }
3048  }
3049  }
3050 }
3051 
3053 {
3054  inter_recon(ctx, 1);
3055 }
3056 
3058 {
3059  inter_recon(ctx, 2);
3060 }
3061 
3062 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3063  int row_and_7, int col_and_7,
3064  int w, int h, int col_end, int row_end,
3065  enum TxfmMode tx, int skip_inter)
3066 {
3067  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3068  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3069 
3070  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3071  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3072  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3073  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3074 
3075  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3076  // edges. This means that for UV, we work on two subsampled blocks at
3077  // a time, and we only use the topleft block's mode information to set
3078  // things like block strength. Thus, for any block size smaller than
3079  // 16x16, ignore the odd portion of the block.
3080  if (tx == TX_4X4 && (ss_v | ss_h)) {
3081  if (h == ss_v) {
3082  if (row_and_7 & 1)
3083  return;
3084  if (!row_end)
3085  h += 1;
3086  }
3087  if (w == ss_h) {
3088  if (col_and_7 & 1)
3089  return;
3090  if (!col_end)
3091  w += 1;
3092  }
3093  }
3094 
3095  if (tx == TX_4X4 && !skip_inter) {
3096  int t = 1 << col_and_7, m_col = (t << w) - t, y;
3097  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3098  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3099 
3100  for (y = row_and_7; y < h + row_and_7; y++) {
3101  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3102 
3103  mask[0][y][1] |= m_row_8;
3104  mask[0][y][2] |= m_row_4;
3105  // for odd lines, if the odd col is not being filtered,
3106  // skip odd row also:
3107  // .---. <-- a
3108  // | |
3109  // |___| <-- b
3110  // ^ ^
3111  // c d
3112  //
3113  // if a/c are even row/col and b/d are odd, and d is skipped,
3114  // e.g. right edge of size-66x66.webm, then skip b also (bug)
3115  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3116  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3117  } else {
3118  mask[1][y][col_mask_id] |= m_col;
3119  }
3120  if (!ss_h)
3121  mask[0][y][3] |= m_col;
3122  if (!ss_v) {
3123  if (ss_h && (col_end & 1))
3124  mask[1][y][3] |= (t << (w - 1)) - t;
3125  else
3126  mask[1][y][3] |= m_col;
3127  }
3128  }
3129  } else {
3130  int y, t = 1 << col_and_7, m_col = (t << w) - t;
3131 
3132  if (!skip_inter) {
3133  int mask_id = (tx == TX_8X8);
3134  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3135  int l2 = tx + ss_h - 1, step1d;
3136  int m_row = m_col & masks[l2];
3137 
3138  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3139  // 8wd loopfilter to prevent going off the visible edge.
3140  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3141  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3142  int m_row_8 = m_row - m_row_16;
3143 
3144  for (y = row_and_7; y < h + row_and_7; y++) {
3145  mask[0][y][0] |= m_row_16;
3146  mask[0][y][1] |= m_row_8;
3147  }
3148  } else {
3149  for (y = row_and_7; y < h + row_and_7; y++)
3150  mask[0][y][mask_id] |= m_row;
3151  }
3152 
3153  l2 = tx + ss_v - 1;
3154  step1d = 1 << l2;
3155  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3156  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3157  mask[1][y][0] |= m_col;
3158  if (y - row_and_7 == h - 1)
3159  mask[1][y][1] |= m_col;
3160  } else {
3161  for (y = row_and_7; y < h + row_and_7; y += step1d)
3162  mask[1][y][mask_id] |= m_col;
3163  }
3164  } else if (tx != TX_4X4) {
3165  int mask_id;
3166 
3167  mask_id = (tx == TX_8X8) || (h == ss_v);
3168  mask[1][row_and_7][mask_id] |= m_col;
3169  mask_id = (tx == TX_8X8) || (w == ss_h);
3170  for (y = row_and_7; y < h + row_and_7; y++)
3171  mask[0][y][mask_id] |= t;
3172  } else {
3173  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3174 
3175  for (y = row_and_7; y < h + row_and_7; y++) {
3176  mask[0][y][2] |= t4;
3177  mask[0][y][1] |= t8;
3178  }
3179  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3180  }
3181  }
3182 }
3183 
3184 static void decode_b(AVCodecContext *ctx, int row, int col,
3185  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3186  enum BlockLevel bl, enum BlockPartition bp)
3187 {
3188  VP9Context *s = ctx->priv_data;
3189  VP9Block *b = s->b;
3190  enum BlockSize bs = bl * 3 + bp;
3191  int bytesperpixel = s->bytesperpixel;
3192  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3193  int emu[2];
3194  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3195 
3196  s->row = row;
3197  s->row7 = row & 7;
3198  s->col = col;
3199  s->col7 = col & 7;
3200  s->min_mv.x = -(128 + col * 64);
3201  s->min_mv.y = -(128 + row * 64);
3202  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3203  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3204  if (s->pass < 2) {
3205  b->bs = bs;
3206  b->bl = bl;
3207  b->bp = bp;
3208  decode_mode(ctx);
3209  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3210  (s->ss_v && h4 * 2 == (1 << b->tx)));
3211 
3212  if (!b->skip) {
3213  int has_coeffs;
3214 
3215  if (bytesperpixel == 1) {
3216  has_coeffs = decode_coeffs_8bpp(ctx);
3217  } else {
3218  has_coeffs = decode_coeffs_16bpp(ctx);
3219  }
3220  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3221  b->skip = 1;
3222  memset(&s->above_skip_ctx[col], 1, w4);
3223  memset(&s->left_skip_ctx[s->row7], 1, h4);
3224  }
3225  } else {
3226  int row7 = s->row7;
3227 
3228 #define SPLAT_ZERO_CTX(v, n) \
3229  switch (n) { \
3230  case 1: v = 0; break; \
3231  case 2: AV_ZERO16(&v); break; \
3232  case 4: AV_ZERO32(&v); break; \
3233  case 8: AV_ZERO64(&v); break; \
3234  case 16: AV_ZERO128(&v); break; \
3235  }
3236 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3237  do { \
3238  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3239  if (s->ss_##dir2) { \
3240  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3241  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3242  } else { \
3243  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3244  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3245  } \
3246  } while (0)
3247 
3248  switch (w4) {
3249  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3250  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3251  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3252  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3253  }
3254  switch (h4) {
3255  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3256  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3257  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3258  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3259  }
3260  }
3261 
3262  if (s->pass == 1) {
3263  s->b++;
3264  s->block += w4 * h4 * 64 * bytesperpixel;
3265  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3266  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3267  s->eob += 4 * w4 * h4;
3268  s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3269  s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3270 
3271  return;
3272  }
3273  }
3274 
3275  // emulated overhangs if the stride of the target buffer can't hold. This
3276  // makes it possible to support emu-edge and so on even if we have large block
3277  // overhangs
3278  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
3279  (row + h4) > s->rows;
3280  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
3281  (row + h4) > s->rows;
3282  if (emu[0]) {
3283  s->dst[0] = s->tmp_y;
3284  s->y_stride = 128;
3285  } else {
3286  s->dst[0] = f->data[0] + yoff;
3287  s->y_stride = f->linesize[0];
3288  }
3289  if (emu[1]) {
3290  s->dst[1] = s->tmp_uv[0];
3291  s->dst[2] = s->tmp_uv[1];
3292  s->uv_stride = 128;
3293  } else {
3294  s->dst[1] = f->data[1] + uvoff;
3295  s->dst[2] = f->data[2] + uvoff;
3296  s->uv_stride = f->linesize[1];
3297  }
3298  if (b->intra) {
3299  if (s->bpp > 8) {
3300  intra_recon_16bpp(ctx, yoff, uvoff);
3301  } else {
3302  intra_recon_8bpp(ctx, yoff, uvoff);
3303  }
3304  } else {
3305  if (s->bpp > 8) {
3306  inter_recon_16bpp(ctx);
3307  } else {
3308  inter_recon_8bpp(ctx);
3309  }
3310  }
3311  if (emu[0]) {
3312  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3313 
3314  for (n = 0; o < w; n++) {
3315  int bw = 64 >> n;
3316 
3317  av_assert2(n <= 4);
3318  if (w & bw) {
3319  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
3320  s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
3321  o += bw;
3322  }
3323  }
3324  }
3325  if (emu[1]) {
3326  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3327  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3328 
3329  for (n = s->ss_h; o < w; n++) {
3330  int bw = 64 >> n;
3331 
3332  av_assert2(n <= 4);
3333  if (w & bw) {
3334  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
3335  s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
3336  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
3337  s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
3338  o += bw;
3339  }
3340  }
3341  }
3342 
3343  // pick filter level and find edges to apply filter to
3344  if (s->s.h.filter.level &&
3345  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3346  [b->mode[3] != ZEROMV]) > 0) {
3347  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3348  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3349 
3350  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3351  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3352  if (s->ss_h || s->ss_v)
3353  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3354  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3355  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3356  b->uvtx, skip_inter);
3357 
3358  if (!s->filter_lut.lim_lut[lvl]) {
3359  int sharp = s->s.h.filter.sharpness;
3360  int limit = lvl;
3361 
3362  if (sharp > 0) {
3363  limit >>= (sharp + 3) >> 2;
3364  limit = FFMIN(limit, 9 - sharp);
3365  }
3366  limit = FFMAX(limit, 1);
3367 
3368  s->filter_lut.lim_lut[lvl] = limit;
3369  s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3370  }
3371  }
3372 
3373  if (s->pass == 2) {
3374  s->b++;
3375  s->block += w4 * h4 * 64 * bytesperpixel;
3376  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3377  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3378  s->eob += 4 * w4 * h4;
3379  s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3380  s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3381  }
3382 }
3383 
3384 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3385  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3386 {
3387  VP9Context *s = ctx->priv_data;
3388  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3389  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3390  const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? vp9_default_kf_partition_probs[bl][c] :
3391  s->prob.p.partition[bl][c];
3392  enum BlockPartition bp;
3393  ptrdiff_t hbs = 4 >> bl;
3394  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3395  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3396  int bytesperpixel = s->bytesperpixel;
3397 
3398  if (bl == BL_8X8) {
3399  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3400  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3401  } else if (col + hbs < s->cols) { // FIXME why not <=?
3402  if (row + hbs < s->rows) { // FIXME why not <=?
3403  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3404  switch (bp) {
3405  case PARTITION_NONE:
3406  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3407  break;
3408  case PARTITION_H:
3409  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3410  yoff += hbs * 8 * y_stride;
3411  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3412  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3413  break;
3414  case PARTITION_V:
3415  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3416  yoff += hbs * 8 * bytesperpixel;
3417  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3418  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3419  break;
3420  case PARTITION_SPLIT:
3421  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3422  decode_sb(ctx, row, col + hbs, lflvl,
3423  yoff + 8 * hbs * bytesperpixel,
3424  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3425  yoff += hbs * 8 * y_stride;
3426  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3427  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3428  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3429  yoff + 8 * hbs * bytesperpixel,
3430  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3431  break;
3432  default:
3433  av_assert0(0);
3434  }
3435  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3436  bp = PARTITION_SPLIT;
3437  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3438  decode_sb(ctx, row, col + hbs, lflvl,
3439  yoff + 8 * hbs * bytesperpixel,
3440  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3441  } else {
3442  bp = PARTITION_H;
3443  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3444  }
3445  } else if (row + hbs < s->rows) { // FIXME why not <=?
3446  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3447  bp = PARTITION_SPLIT;
3448  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3449  yoff += hbs * 8 * y_stride;
3450  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3451  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3452  } else {
3453  bp = PARTITION_V;
3454  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3455  }
3456  } else {
3457  bp = PARTITION_SPLIT;
3458  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3459  }
3460  s->counts.partition[bl][c][bp]++;
3461 }
3462 
3463 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3464  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3465 {
3466  VP9Context *s = ctx->priv_data;
3467  VP9Block *b = s->b;
3468  ptrdiff_t hbs = 4 >> bl;
3469  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3470  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3471  int bytesperpixel = s->bytesperpixel;
3472 
3473  if (bl == BL_8X8) {
3474  av_assert2(b->bl == BL_8X8);
3475  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3476  } else if (s->b->bl == bl) {
3477  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3478  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3479  yoff += hbs * 8 * y_stride;
3480  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3481  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3482  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3483  yoff += hbs * 8 * bytesperpixel;
3484  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3485  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3486  }
3487  } else {
3488  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3489  if (col + hbs < s->cols) { // FIXME why not <=?
3490  if (row + hbs < s->rows) {
3491  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3492  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3493  yoff += hbs * 8 * y_stride;
3494  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3495  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3496  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3497  yoff + 8 * hbs * bytesperpixel,
3498  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3499  } else {
3500  yoff += hbs * 8 * bytesperpixel;
3501  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3502  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3503  }
3504  } else if (row + hbs < s->rows) {
3505  yoff += hbs * 8 * y_stride;
3506  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3507  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3508  }
3509  }
3510 }
3511 
3512 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3513  uint8_t *lvl, uint8_t (*mask)[4],
3514  uint8_t *dst, ptrdiff_t ls)
3515 {
3516  int y, x, bytesperpixel = s->bytesperpixel;
3517 
3518  // filter edges between columns (e.g. block1 | block2)
3519  for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3520  uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3521  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3522  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3523  unsigned hm = hm1 | hm2 | hm13 | hm23;
3524 
3525  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3526  if (col || x > 1) {
3527  if (hm1 & x) {
3528  int L = *l, H = L >> 4;
3529  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3530 
3531  if (hmask1[0] & x) {
3532  if (hmask2[0] & x) {
3533  av_assert2(l[8 << ss_v] == L);
3534  s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3535  } else {
3536  s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3537  }
3538  } else if (hm2 & x) {
3539  L = l[8 << ss_v];
3540  H |= (L >> 4) << 8;
3541  E |= s->filter_lut.mblim_lut[L] << 8;
3542  I |= s->filter_lut.lim_lut[L] << 8;
3543  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3544  [!!(hmask2[1] & x)]
3545  [0](ptr, ls, E, I, H);
3546  } else {
3547  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3548  [0](ptr, ls, E, I, H);
3549  }
3550  } else if (hm2 & x) {
3551  int L = l[8 << ss_v], H = L >> 4;
3552  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3553 
3554  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3555  [0](ptr + 8 * ls, ls, E, I, H);
3556  }
3557  }
3558  if (ss_h) {
3559  if (x & 0xAA)
3560  l += 2;
3561  } else {
3562  if (hm13 & x) {
3563  int L = *l, H = L >> 4;
3564  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3565 
3566  if (hm23 & x) {
3567  L = l[8 << ss_v];
3568  H |= (L >> 4) << 8;
3569  E |= s->filter_lut.mblim_lut[L] << 8;
3570  I |= s->filter_lut.lim_lut[L] << 8;
3571  s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3572  } else {
3573  s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3574  }
3575  } else if (hm23 & x) {
3576  int L = l[8 << ss_v], H = L >> 4;
3577  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3578 
3579  s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3580  }
3581  l++;
3582  }
3583  }
3584  }
3585 }
3586 
3587 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3588  uint8_t *lvl, uint8_t (*mask)[4],
3589  uint8_t *dst, ptrdiff_t ls)
3590 {
3591  int y, x, bytesperpixel = s->bytesperpixel;
3592 
3593  // block1
3594  // filter edges between rows (e.g. ------)
3595  // block2
3596  for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3597  uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3598  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3599 
3600  for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3601  if (row || y) {
3602  if (vm & x) {
3603  int L = *l, H = L >> 4;
3604  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3605 
3606  if (vmask[0] & x) {
3607  if (vmask[0] & (x << (1 + ss_h))) {
3608  av_assert2(l[1 + ss_h] == L);
3609  s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3610  } else {
3611  s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3612  }
3613  } else if (vm & (x << (1 + ss_h))) {
3614  L = l[1 + ss_h];
3615  H |= (L >> 4) << 8;
3616  E |= s->filter_lut.mblim_lut[L] << 8;
3617  I |= s->filter_lut.lim_lut[L] << 8;
3618  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3619  [!!(vmask[1] & (x << (1 + ss_h)))]
3620  [1](ptr, ls, E, I, H);
3621  } else {
3622  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3623  [1](ptr, ls, E, I, H);
3624  }
3625  } else if (vm & (x << (1 + ss_h))) {
3626  int L = l[1 + ss_h], H = L >> 4;
3627  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3628 
3629  s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3630  [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3631  }
3632  }
3633  if (!ss_v) {
3634  if (vm3 & x) {
3635  int L = *l, H = L >> 4;
3636  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3637 
3638  if (vm3 & (x << (1 + ss_h))) {
3639  L = l[1 + ss_h];
3640  H |= (L >> 4) << 8;
3641  E |= s->filter_lut.mblim_lut[L] << 8;
3642  I |= s->filter_lut.lim_lut[L] << 8;
3643  s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3644  } else {
3645  s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3646  }
3647  } else if (vm3 & (x << (1 + ss_h))) {
3648  int L = l[1 + ss_h], H = L >> 4;
3649  int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3650 
3651  s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3652  }
3653  }
3654  }
3655  if (ss_v) {
3656  if (y & 1)
3657  lvl += 16;
3658  } else {
3659  lvl += 8;
3660  }
3661  }
3662 }
3663 
3664 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3665  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3666 {
3667  VP9Context *s = ctx->priv_data;
3668  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3669  uint8_t *dst = f->data[0] + yoff;
3670  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3671  uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3672  int p;
3673 
3674  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3675  // if you think of them as acting on a 8x8 block max, we can interleave
3676  // each v/h within the single x loop, but that only works if we work on
3677  // 8 pixel blocks, and we won't always do that (we want at least 16px
3678  // to use SSE2 optimizations, perhaps 32 for AVX2)
3679 
3680  filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3681  filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3682 
3683  for (p = 0; p < 2; p++) {
3684  dst = f->data[1 + p] + uvoff;
3685  filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3686  filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3687  }
3688 }
3689 
3690 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3691 {
3692  int sb_start = ( idx * n) >> log2_n;
3693  int sb_end = ((idx + 1) * n) >> log2_n;
3694  *start = FFMIN(sb_start, n) << 3;
3695  *end = FFMIN(sb_end, n) << 3;
3696 }
3697 
3698 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3699  int max_count, int update_factor)
3700 {
3701  unsigned ct = ct0 + ct1, p2, p1;
3702 
3703  if (!ct)
3704  return;
3705 
3706  p1 = *p;
3707  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3708  p2 = av_clip(p2, 1, 255);
3709  ct = FFMIN(ct, max_count);
3710  update_factor = FASTDIV(update_factor * ct, max_count);
3711 
3712  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3713  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3714 }
3715 
3716 static void adapt_probs(VP9Context *s)
3717 {
3718  int i, j, k, l, m;
3719  prob_context *p = &s->prob_ctx[s->s.h.framectxid].p;
3720  int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
3721 
3722  // coefficients
3723  for (i = 0; i < 4; i++)
3724  for (j = 0; j < 2; j++)
3725  for (k = 0; k < 2; k++)
3726  for (l = 0; l < 6; l++)
3727  for (m = 0; m < 6; m++) {
3728  uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
3729  unsigned *e = s->counts.eob[i][j][k][l][m];
3730  unsigned *c = s->counts.coef[i][j][k][l][m];
3731 
3732  if (l == 0 && m >= 3) // dc only has 3 pt
3733  break;
3734 
3735  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3736  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3737  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3738  }
3739 
3740  if (s->s.h.keyframe || s->s.h.intraonly) {
3741  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3742  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3743  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3744  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3745  return;
3746  }
3747 
3748  // skip flag
3749  for (i = 0; i < 3; i++)
3750  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3751 
3752  // intra/inter flag
3753  for (i = 0; i < 4; i++)
3754  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3755 
3756  // comppred flag
3757  if (s->s.h.comppredmode == PRED_SWITCHABLE) {
3758  for (i = 0; i < 5; i++)
3759  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3760  }
3761 
3762  // reference frames
3763  if (s->s.h.comppredmode != PRED_SINGLEREF) {
3764  for (i = 0; i < 5; i++)
3765  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3766  s->counts.comp_ref[i][1], 20, 128);
3767  }
3768 
3769  if (s->s.h.comppredmode != PRED_COMPREF) {
3770  for (i = 0; i < 5; i++) {
3771  uint8_t *pp = p->single_ref[i];
3772  unsigned (*c)[2] = s->counts.single_ref[i];
3773 
3774  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3775  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3776  }
3777  }
3778 
3779  // block partitioning
3780  for (i = 0; i < 4; i++)
3781  for (j = 0; j < 4; j++) {
3782  uint8_t *pp = p->partition[i][j];
3783  unsigned *c = s->counts.partition[i][j];
3784 
3785  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3786  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3787  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3788  }
3789 
3790  // tx size
3791  if (s->s.h.txfmmode == TX_SWITCHABLE) {
3792  for (i = 0; i < 2; i++) {
3793  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3794 
3795  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3796  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3797  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3798  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3799  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3800  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3801  }
3802  }
3803 
3804  // interpolation filter
3805  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
3806  for (i = 0; i < 4; i++) {
3807  uint8_t *pp = p->filter[i];
3808  unsigned *c = s->counts.filter[i];
3809 
3810  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3811  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3812  }
3813  }
3814 
3815  // inter modes
3816  for (i = 0; i < 7; i++) {
3817  uint8_t *pp = p->mv_mode[i];
3818  unsigned *c = s->counts.mv_mode[i];
3819 
3820  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3821  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3822  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3823  }
3824 
3825  // mv joints
3826  {
3827  uint8_t *pp = p->mv_joint;
3828  unsigned *c = s->counts.mv_joint;
3829 
3830  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3831  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3832  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3833  }
3834 
3835  // mv components
3836  for (i = 0; i < 2; i++) {
3837  uint8_t *pp;
3838  unsigned *c, (*c2)[2], sum;
3839 
3840  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3841  s->counts.mv_comp[i].sign[1], 20, 128);
3842 
3843  pp = p->mv_comp[i].classes;
3844  c = s->counts.mv_comp[i].classes;
3845  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3846  adapt_prob(&pp[0], c[0], sum, 20, 128);
3847  sum -= c[1];
3848  adapt_prob(&pp[1], c[1], sum, 20, 128);
3849  sum -= c[2] + c[3];
3850  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3851  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3852  sum -= c[4] + c[5];
3853  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3854  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3855  sum -= c[6];
3856  adapt_prob(&pp[6], c[6], sum, 20, 128);
3857  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3858  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3859  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3860 
3861  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3862  s->counts.mv_comp[i].class0[1], 20, 128);
3863  pp = p->mv_comp[i].bits;
3864  c2 = s->counts.mv_comp[i].bits;
3865  for (j = 0; j < 10; j++)
3866  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3867 
3868  for (j = 0; j < 2; j++) {
3869  pp = p->mv_comp[i].class0_fp[j];
3870  c = s->counts.mv_comp[i].class0_fp[j];
3871  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3872  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3873  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3874  }
3875  pp = p->mv_comp[i].fp;
3876  c = s->counts.mv_comp[i].fp;
3877  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3878  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3879  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3880 
3881  if (s->s.h.highprecisionmvs) {
3882  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3883  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3884  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3885  s->counts.mv_comp[i].hp[1], 20, 128);
3886  }
3887  }
3888 
3889  // y intra modes
3890  for (i = 0; i < 4; i++) {
3891  uint8_t *pp = p->y_mode[i];
3892  unsigned *c = s->counts.y_mode[i], sum, s2;
3893 
3894  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3895  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3896  sum -= c[TM_VP8_PRED];
3897  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3898  sum -= c[VERT_PRED];
3899  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3900  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3901  sum -= s2;
3902  adapt_prob(&pp[3], s2, sum, 20, 128);
3903  s2 -= c[HOR_PRED];
3904  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3905  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3906  sum -= c[DIAG_DOWN_LEFT_PRED];
3907  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3908  sum -= c[VERT_LEFT_PRED];
3909  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3910  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3911  }
3912 
3913  // uv intra modes
3914  for (i = 0; i < 10; i++) {
3915  uint8_t *pp = p->uv_mode[i];
3916  unsigned *c = s->counts.uv_mode[i], sum, s2;
3917 
3918  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3919  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3920  sum -= c[TM_VP8_PRED];
3921  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3922  sum -= c[VERT_PRED];
3923  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3924  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3925  sum -= s2;
3926  adapt_prob(&pp[3], s2, sum, 20, 128);
3927  s2 -= c[HOR_PRED];
3928  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3929  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3930  sum -= c[DIAG_DOWN_LEFT_PRED];
3931  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3932  sum -= c[VERT_LEFT_PRED];
3933  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3934  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3935  }
3936 }
3937 
3938 static void free_buffers(VP9Context *s)
3939 {
3940  av_freep(&s->intra_pred_data[0]);
3941  av_freep(&s->b_base);
3942  av_freep(&s->block_base);
3943 }
3944 
3946 {
3947  VP9Context *s = ctx->priv_data;
3948  int i;
3949 
3950  for (i = 0; i < 3; i++) {
3951  if (s->s.frames[i].tf.f->buf[0])
3952  vp9_unref_frame(ctx, &s->s.frames[i]);
3953  av_frame_free(&s->s.frames[i].tf.f);
3954  }
3955  for (i = 0; i < 8; i++) {
3956  if (s->s.refs[i].f->buf[0])
3957  ff_thread_release_buffer(ctx, &s->s.refs[i]);
3958  av_frame_free(&s->s.refs[i].f);
3959  if (s->next_refs[i].f->buf[0])
3960  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3961  av_frame_free(&s->next_refs[i].f);
3962  }
3963  free_buffers(s);
3964  av_freep(&s->c_b);
3965  s->c_b_size = 0;
3966 
3967  return 0;
3968 }
3969 
3970 
3972  int *got_frame, AVPacket *pkt)
3973 {
3974  const uint8_t *data = pkt->data;
3975  int size = pkt->size;
3976  VP9Context *s = ctx->priv_data;
3977  int res, tile_row, tile_col, i, ref, row, col;
3978  int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
3980  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3981  AVFrame *f;
3982  int bytesperpixel;
3983 
3984  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3985  return res;
3986  } else if (res == 0) {
3987  if (!s->s.refs[ref].f->buf[0]) {
3988  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3989  return AVERROR_INVALIDDATA;
3990  }
3991  if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
3992  return res;
3993  ((AVFrame *)frame)->pkt_pts = pkt->pts;
3994  ((AVFrame *)frame)->pkt_dts = pkt->dts;
3995  for (i = 0; i < 8; i++) {
3996  if (s->next_refs[i].f->buf[0])
3997  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3998  if (s->s.refs[i].f->buf[0] &&
3999  (res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
4000  return res;
4001  }
4002  *got_frame = 1;
4003  return pkt->size;
4004  }
4005  data += res;
4006  size -= res;
4007 
4008  if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
4009  if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
4011  if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
4012  (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
4013  return res;
4014  }
4015  if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
4017  if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
4018  (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
4019  return res;
4020  if (s->s.frames[CUR_FRAME].tf.f->buf[0])
4021  vp9_unref_frame(ctx, &s->s.frames[CUR_FRAME]);
4022  if ((res = vp9_alloc_frame(ctx, &s->s.frames[CUR_FRAME])) < 0)
4023  return res;
4024  f = s->s.frames[CUR_FRAME].tf.f;
4025  f->key_frame = s->s.h.keyframe;
4027  ls_y = f->linesize[0];
4028  ls_uv =f->linesize[1];
4029 
4030  if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
4034  }
4035 
4036  // ref frame setup
4037  for (i = 0; i < 8; i++) {
4038  if (s->next_refs[i].f->buf[0])
4039  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4040  if (s->s.h.refreshrefmask & (1 << i)) {
4041  res = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
4042  } else if (s->s.refs[i].f->buf[0]) {
4043  res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
4044  }
4045  if (res < 0)
4046  return res;
4047  }
4048 
4049  if (ctx->hwaccel) {
4050  res = ctx->hwaccel->start_frame(ctx, NULL, 0);
4051  if (res < 0)
4052  return res;
4053  res = ctx->hwaccel->decode_slice(ctx, pkt->data, pkt->size);
4054  if (res < 0)
4055  return res;
4056  res = ctx->hwaccel->end_frame(ctx);
4057  if (res < 0)
4058  return res;
4059  goto finish;
4060  }
4061 
4062  // main tile decode loop
4063  bytesperpixel = s->bytesperpixel;
4064  memset(s->above_partition_ctx, 0, s->cols);
4065  memset(s->above_skip_ctx, 0, s->cols);
4066  if (s->s.h.keyframe || s->s.h.intraonly) {
4067  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4068  } else {
4069  memset(s->above_mode_ctx, NEARESTMV, s->cols);
4070  }
4071  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4072  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4073  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4074  memset(s->above_segpred_ctx, 0, s->cols);
4075  s->pass = s->s.frames[CUR_FRAME].uses_2pass =
4077  if ((res = update_block_buffers(ctx)) < 0) {
4078  av_log(ctx, AV_LOG_ERROR,
4079  "Failed to allocate block buffers\n");
4080  return res;
4081  }
4082  if (s->s.h.refreshctx && s->s.h.parallelmode) {
4083  int j, k, l, m;
4084 
4085  for (i = 0; i < 4; i++) {
4086  for (j = 0; j < 2; j++)
4087  for (k = 0; k < 2; k++)
4088  for (l = 0; l < 6; l++)
4089  for (m = 0; m < 6; m++)
4090  memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
4091  s->prob.coef[i][j][k][l][m], 3);
4092  if (s->s.h.txfmmode == i)
4093  break;
4094  }
4095  s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
4097  } else if (!s->s.h.refreshctx) {
4099  }
4100 
4101  do {
4102  yoff = uvoff = 0;
4103  s->b = s->b_base;
4104  s->block = s->block_base;
4105  s->uvblock[0] = s->uvblock_base[0];
4106  s->uvblock[1] = s->uvblock_base[1];
4107  s->eob = s->eob_base;
4108  s->uveob[0] = s->uveob_base[0];
4109  s->uveob[1] = s->uveob_base[1];
4110 
4111  for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
4113  tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
4114  if (s->pass != 2) {
4115  for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4116  int64_t tile_size;
4117 
4118  if (tile_col == s->s.h.tiling.tile_cols - 1 &&
4119  tile_row == s->s.h.tiling.tile_rows - 1) {
4120  tile_size = size;
4121  } else {
4122  tile_size = AV_RB32(data);
4123  data += 4;
4124  size -= 4;
4125  }
4126  if (tile_size > size) {
4127  ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4128  return AVERROR_INVALIDDATA;
4129  }
4130  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4131  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4132  ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4133  return AVERROR_INVALIDDATA;
4134  }
4135  data += tile_size;
4136  size -= tile_size;
4137  }
4138  }
4139 
4140  for (row = s->tile_row_start; row < s->tile_row_end;
4141  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4142  struct VP9Filter *lflvl_ptr = s->lflvl;
4143  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4144 
4145  for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4147  tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
4148 
4149  if (s->pass != 2) {
4150  memset(s->left_partition_ctx, 0, 8);
4151  memset(s->left_skip_ctx, 0, 8);
4152  if (s->s.h.keyframe || s->s.h.intraonly) {
4153  memset(s->left_mode_ctx, DC_PRED, 16);
4154  } else {
4155  memset(s->left_mode_ctx, NEARESTMV, 8);
4156  }
4157  memset(s->left_y_nnz_ctx, 0, 16);
4158  memset(s->left_uv_nnz_ctx, 0, 32);
4159  memset(s->left_segpred_ctx, 0, 8);
4160 
4161  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4162  }
4163 
4164  for (col = s->tile_col_start;
4165  col < s->tile_col_end;
4166  col += 8, yoff2 += 64 * bytesperpixel,
4167  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4168  // FIXME integrate with lf code (i.e. zero after each
4169  // use, similar to invtxfm coefficients, or similar)
4170  if (s->pass != 1) {
4171  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4172  }
4173 
4174  if (s->pass == 2) {
4175  decode_sb_mem(ctx, row, col, lflvl_ptr,
4176  yoff2, uvoff2, BL_64X64);
4177  } else {
4178  decode_sb(ctx, row, col, lflvl_ptr,
4179  yoff2, uvoff2, BL_64X64);
4180  }
4181  }
4182  if (s->pass != 2) {
4183  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4184  }
4185  }
4186 
4187  if (s->pass == 1) {
4188  continue;
4189  }
4190 
4191  // backup pre-loopfilter reconstruction data for intra
4192  // prediction of next row of sb64s
4193  if (row + 8 < s->rows) {
4194  memcpy(s->intra_pred_data[0],
4195  f->data[0] + yoff + 63 * ls_y,
4196  8 * s->cols * bytesperpixel);
4197  memcpy(s->intra_pred_data[1],
4198  f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4199  8 * s->cols * bytesperpixel >> s->ss_h);
4200  memcpy(s->intra_pred_data[2],
4201  f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4202  8 * s->cols * bytesperpixel >> s->ss_h);
4203  }
4204 
4205  // loopfilter one row
4206  if (s->s.h.filter.level) {
4207  yoff2 = yoff;
4208  uvoff2 = uvoff;
4209  lflvl_ptr = s->lflvl;
4210  for (col = 0; col < s->cols;
4211  col += 8, yoff2 += 64 * bytesperpixel,
4212  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4213  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4214  }
4215  }
4216 
4217  // FIXME maybe we can make this more finegrained by running the
4218  // loopfilter per-block instead of after each sbrow
4219  // In fact that would also make intra pred left preparation easier?
4220  ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
4221  }
4222  }
4223 
4224  if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
4225  adapt_probs(s);
4227  }
4228  } while (s->pass++ == 1);
4229  ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4230 
4231 finish:
4232  // ref frame setup
4233  for (i = 0; i < 8; i++) {
4234  if (s->s.refs[i].f->buf[0])
4235  ff_thread_release_buffer(ctx, &s->s.refs[i]);
4236  if (s->next_refs[i].f->buf[0] &&
4237  (res = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
4238  return res;
4239  }
4240 
4241  if (!s->s.h.invisible) {
4242  if ((res = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
4243  return res;
4244  *got_frame = 1;
4245  }
4246 
4247  return pkt->size;
4248 }
4249 
4251 {
4252  VP9Context *s = ctx->priv_data;
4253  int i;
4254 
4255  for (i = 0; i < 3; i++)
4256  vp9_unref_frame(ctx, &s->s.frames[i]);
4257  for (i = 0; i < 8; i++)
4258  ff_thread_release_buffer(ctx, &s->s.refs[i]);
4259 }
4260 
4262 {
4263  VP9Context *s = ctx->priv_data;
4264  int i;
4265 
4266  for (i = 0; i < 3; i++) {
4267  s->s.frames[i].tf.f = av_frame_alloc();
4268  if (!s->s.frames[i].tf.f) {
4269  vp9_decode_free(ctx);
4270  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4271  return AVERROR(ENOMEM);
4272  }
4273  }
4274  for (i = 0; i < 8; i++) {
4275  s->s.refs[i].f = av_frame_alloc();
4276  s->next_refs[i].f = av_frame_alloc();
4277  if (!s->s.refs[i].f || !s->next_refs[i].f) {
4278  vp9_decode_free(ctx);
4279  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4280  return AVERROR(ENOMEM);
4281  }
4282  }
4283 
4284  return 0;
4285 }
4286 
4288 {
4289  VP9Context *s = ctx->priv_data;
4290 
4291  ctx->internal->allocate_progress = 1;
4292  s->last_bpp = 0;
4293  s->s.h.filter.sharpness = -1;
4294 
4295  return init_frames(ctx);
4296 }
4297 
4298 #if HAVE_THREADS
4299 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4300 {
4301  return init_frames(avctx);
4302 }
4303 
4304 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4305 {
4306  int i, res;
4307  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4308 
4309  for (i = 0; i < 3; i++) {
4310  if (s->s.frames[i].tf.f->buf[0])
4311  vp9_unref_frame(dst, &s->s.frames[i]);
4312  if (ssrc->s.frames[i].tf.f->buf[0]) {
4313  if ((res = vp9_ref_frame(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
4314  return res;
4315  }
4316  }
4317  for (i = 0; i < 8; i++) {
4318  if (s->s.refs[i].f->buf[0])
4319  ff_thread_release_buffer(dst, &s->s.refs[i]);
4320  if (ssrc->next_refs[i].f->buf[0]) {
4321  if ((res = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
4322  return res;
4323  }
4324  }
4325 
4326  s->s.h.invisible = ssrc->s.h.invisible;
4327  s->s.h.keyframe = ssrc->s.h.keyframe;
4328  s->s.h.intraonly = ssrc->s.h.intraonly;
4329  s->ss_v = ssrc->ss_v;
4330  s->ss_h = ssrc->ss_h;
4331  s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
4332  s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
4333  s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
4334  s->bytesperpixel = ssrc->bytesperpixel;
4335  s->gf_fmt = ssrc->gf_fmt;
4336  s->w = ssrc->w;
4337  s->h = ssrc->h;
4338  s->bpp = ssrc->bpp;
4339  s->bpp_index = ssrc->bpp_index;
4340  s->pix_fmt = ssrc->pix_fmt;
4341  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4342  memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
4343  memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
4344  sizeof(s->s.h.segmentation.feat));
4345 
4346  return 0;
4347 }
4348 #endif
4349 
4351  .name = "vp9",
4352  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4353  .type = AVMEDIA_TYPE_VIDEO,
4354  .id = AV_CODEC_ID_VP9,
4355  .priv_data_size = sizeof(VP9Context),
4356  .init = vp9_decode_init,
4357  .close = vp9_decode_free,
4359  .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4361  .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4362  .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4364 };
Definition: vp9.h:101
Definition: vp9.h:49
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:422
ThreadFrame tf
Definition: vp9.h:127
int8_t uvac_qdelta
Definition: vp9.h:168
av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
Definition: videodsp.c:38
#define NULL
Definition: coverity.c:32
uint8_t skip[3]
Definition: vp9data.h:1441
const char const char void * val
Definition: avisynth_c.h:634
int(* start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size)
Called at the beginning of each frame or field picture.
Definition: avcodec.h:3560
unsigned hp[2]
Definition: vp9.c:117
Definition: vp9.h:83
const char * s
Definition: avisynth_c.h:631
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
uint8_t profile
Definition: vp9.h:139
static enum AVPixelFormat pix_fmt
Definition: vp9.h:51
uint8_t * segmentation_map
Definition: vp9.h:129
#define AV_PIX_FMT_YUV440P10
Definition: pixfmt.h:332
uint8_t parallelmode
Definition: vp9.h:150
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it...
Definition: buffer.c:124
static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v, uint8_t *lvl, uint8_t(*mask)[4], uint8_t *dst, ptrdiff_t ls)
Definition: vp9.c:3512
This structure describes decoded (raw) audio or video data.
Definition: frame.h:181
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
Definition: vp9.c:3690
unsigned comp_ref[5][2]
Definition: vp9.c:103
static int update_size(AVCodecContext *ctx, int w, int h)
Definition: vp9.c:247
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
uint8_t allowcompinter
Definition: vp9.h:148
static void flush(AVCodecContext *avctx)
uint8_t mblim_lut[64]
Definition: vp9.c:84
uint8_t left_segpred_ctx[8]
Definition: vp9.c:132
VP5 and VP6 compatible video decoder (common features)
uint8_t update_map
Definition: vp9.h:175
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
Definition: vp9.c:381
static const int8_t vp9_segmentation_tree[7][2]
Definition: vp9data.h:59
static const uint8_t vp9_model_pareto8[256][8]
Definition: vp9data.h:1170
AVFormatContext * ctx
Definition: movenc-test.c:48
struct VP9Context::@110 prob_ctx[4]
uint8_t * above_skip_ctx
Definition: vp9.c:142
uint8_t * eob_base
Definition: vp9.c:159
uint8_t comp[5]
Definition: vp9data.h:1435
struct VP9BitstreamHeader::@117 segmentation
static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v, uint8_t *lvl, uint8_t(*mask)[4], uint8_t *dst, ptrdiff_t ls)
Definition: vp9.c:3587
uint8_t mvstep[3][2]
Definition: vp9.c:164
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:68
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:260
uint8_t fp[3]
Definition: vp9data.h:1449
static int init_thread_copy(AVCodecContext *avctx)
Definition: tta.c:390
uint8_t temporal
Definition: vp9.h:173
AVFrame * f
Definition: thread.h:36
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:376
int ff_set_dimensions(AVCodecContext *s, int width, int height)
Check that the provided frame dimensions are valid and set them on the codec context.
Definition: utils.c:209
int row
Definition: vp9.c:66
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:357
#define INVALID_MV
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
uint8_t tx32p[2][3]
Definition: vp9data.h:1438
#define SPLAT_CTX(var, val, n)
static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:186
static const uint8_t vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.h:81
static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2253
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601 ...
Definition: pixfmt.h:426
VP9BitstreamHeader h
Definition: vp9.h:202
VideoDSPContext vdsp
Definition: vp9.c:59
static const int8_t vp9_mv_fp_tree[3][2]
Definition: vp9data.h:2271
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9.c:2470
uint8_t tx16p[2][2]
Definition: vp9data.h:1439
BlockPartition
Definition: vp9.h:39
uint8_t last_keyframe
Definition: vp9.c:72
enum AVColorRange color_range
MPEG vs JPEG YUV range.
Definition: avcodec.h:2262
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:181
uint8_t ss_v
Definition: vp9.c:70
#define SET_CTXS(dir, off, n)
int size
Definition: avcodec.h:1468
const char * b
Definition: vf_curves.c:109
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:346
static const int8_t vp9_intramode_tree[9][2]
Definition: vp9data.h:69
uint8_t prob[7]
Definition: vp9.h:176
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53
also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above ...
Definition: pixfmt.h:427
static const uint8_t vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.h:195
struct VP9mvrefPair * mv
Definition: vp9.h:130
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
Definition: avcodec.h:1752
unsigned skip[3][2]
Definition: vp9.c:107
Definition: vp9.h:47
BlockLevel
Definition: vp9.h:32
uint8_t left_uv_nnz_ctx[2][16]
Definition: vp9.c:128
#define AV_PIX_FMT_YUV420P12
Definition: pixfmt.h:334
Definition: vp9.h:104
uint8_t framectxid
Definition: vp9.h:151
#define t8
Definition: regdef.h:53
static void adapt_probs(VP9Context *s)
Definition: vp9.c:3716
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:51
Definition: vp9.c:45
static AVPacket pkt
static int decode_frame_header(AVCodecContext *ctx, const uint8_t *data, int size, int *ref)
Definition: vp9.c:513
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9.c:3062
unsigned cols
Definition: vp9.c:79
struct VP9BitstreamHeader::@115 filter
unsigned tile_col_end
Definition: vp9.c:86
int profile
profile
Definition: avcodec.h:3028
uint8_t class0_hp
Definition: vp9data.h:1450
uint8_t ref[2]
Definition: vp9.c:46
AVCodec.
Definition: avcodec.h:3392
uint8_t intra[4]
Definition: vp9data.h:1434
static int vp9_decode_frame(AVCodecContext *ctx, void *frame, int *got_frame, AVPacket *pkt)
Definition: vp9.c:3971
order of coefficients is actually GBR, also IEC 61966-2-1 (sRGB)
Definition: pixfmt.h:421
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
#define AV_COPY32(d, s)
Definition: intreadwrite.h:586
unsigned fp[4]
Definition: vp9.c:115
Definition: vp9.h:65
unsigned log2_tile_rows
Definition: vp9.h:193
uint8_t * intra_pred_data[3]
Definition: vp9.c:152
int uncompressed_header_size
Definition: vp9.h:197
int y
Definition: vp9.c:160
#define AV_RN32A(p)
Definition: intreadwrite.h:526
unsigned uv_mode[10][10]
Definition: vp9.c:97
vp9_mc_func mc[5][4][2][2][2]
Definition: vp9dsp.h:114
enum FilterMode filtermode
Definition: vp9.h:147
struct AVHWAccel * hwaccel
Hardware accelerator in use.
Definition: avcodec.h:2843
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9.c:2726
int16_t y
Definition: vp56.h:67
static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:177
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
Definition: videodsp.h:63
uint8_t coef[4][2][2][6][6][3]
Definition: vp9.c:89
struct VP9Context::@113 max_mv
#define VP9_SYNCCODE
Definition: vp9.c:37
struct prob_context::@120 mv_comp[2]
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
Definition: vp9.c:2986
uint8_t resetctx
Definition: vp9.h:144
uint8_t bits
Definition: crc.c:295
int mem
Definition: avisynth_c.h:684
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:141
uint8_t absolute_vals
Definition: vp9.h:174
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
uint8_t varcompref[2]
Definition: vp9.h:156
unsigned y_mode[4][10]
Definition: vp9.c:96
mode
Definition: f_perms.c:27
#define H
Definition: swscale-test.c:353
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2885
TxfmType
Definition: vp9.h:73
AVColorSpace
YUV colorspace type.
Definition: pixfmt.h:420
uint8_t classes[10]
Definition: vp9data.h:1445
static void free_buffers(VP9Context *s)
Definition: vp9.c:3938
uint8_t updated
Definition: vp9.h:163
VP9Frame frames[3]
Definition: vp9.h:208
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Multithreading support functions.
Definition: vp9.h:82
uint8_t bpp
Definition: vp9.c:71
static const int16_t vp9_ac_qlookup[3][256]
Definition: vp9data.h:328
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
Definition: frame.c:375
#define FF_CODEC_PROPERTY_LOSSLESS
Definition: avcodec.h:3346
uint8_t yac_qi
Definition: vp9.h:167
uint8_t * uveob_base[2]
Definition: vp9.c:159
static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3]
Definition: vp9data.h:1561
int col
Definition: vp9.c:66
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_WL32 unsigned int_TMPL AV_WL24 unsigned int_TMPL AV_WL16 uint64_t_TMPL AV_WB64 unsigned int_TMPL AV_RB32
Definition: bytestream.h:87
static AVFrame * frame
uint8_t errorres
Definition: vp9.h:142
static av_cold int vp9_decode_free(AVCodecContext *ctx)
Definition: vp9.c:3945
AVBufferRef * hwaccel_priv_buf
Definition: vp9.h:133
uint8_t * data
Definition: avcodec.h:1467
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:212
AVBufferRef * extradata
Definition: vp9.h:128
#define CUR_FRAME
Definition: vp9.h:205
int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
Definition: utils.c:3382
uint8_t enabled
Definition: vp9.h:162
bitstream reader API header.
uint8_t * above_uv_nnz_ctx[2]
Definition: vp9.c:141
VP9DSPContext dsp
Definition: vp9.c:58
uint8_t lim_lut[64]
Definition: vp9.c:83
static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src, VP9Context *s)
Definition: vp9.c:1043
ptrdiff_t size
Definition: opengl_enc.c:101
unsigned sign[2]
Definition: vp9.c:110
enum CompPredMode comppredmode
Definition: vp9.h:191
#define AV_PIX_FMT_YUV422P12
Definition: pixfmt.h:335
Definition: vp9.h:74
static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3384
unsigned log2_tile_cols
Definition: vp9.h:193
void ff_thread_finish_setup(AVCodecContext *avctx)
If the codec defines update_thread_context(), call this when they are ready for the next thread to st...
uint16_t mvscale[3][2]
Definition: vp9.c:163
uint8_t mode[4]
Definition: vp9.c:46
Definition: vp9.c:39
uint8_t left_ref_ctx[8]
Definition: vp9.c:135
uint8_t refidx[3]
Definition: vp9.h:153
int x
Definition: vp9.c:160
uint8_t * above_txfm_ctx
Definition: vp9.c:143
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp.c:28
#define assign_val(c, i, v)
int h
Definition: vp9.c:77
#define av_log(a,...)
static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9.c:2721
unsigned m
Definition: audioconvert.c:187
Definition: vp9.h:66
int16_t * block
Definition: vp9.c:158
uint8_t bytesperpixel
Definition: vp9.c:71
uint8_t mask[2][2][8][4]
Definition: vp9.c:42
int16_t * uvblock[2]
Definition: vp9.c:158
Definition: vp9.h:64
int width
width and height of the video frame
Definition: frame.h:230
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define REF_FRAME_MVPAIR
Definition: vp9.h:206
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given block if it is not large enough, otherwise do nothing.
Definition: mem.c:480
#define s2
Definition: regdef.h:39
void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
Wrapper around release_buffer() frame-for multithreaded codecs.
static int update_block_buffers(AVCodecContext *ctx)
Definition: vp9.c:338
unsigned mv_mode[7][4]
Definition: vp9.c:99
static const uint16_t mask[17]
Definition: lzw.c:38
static const int8_t vp9_mv_class_tree[10][2]
Definition: vp9data.h:2258
uint8_t left_partition_ctx[8]
Definition: vp9.c:129
unsigned tile_cols
Definition: vp9.h:194
#define AVERROR(e)
Definition: error.h:43
uint8_t comp_ref[5]
Definition: vp9data.h:1437
GetBitContext gb
Definition: vp9.c:60
ptrdiff_t uv_stride
Definition: vp9.c:68
uint8_t single_ref[5][2]
Definition: vp9data.h:1436
uint8_t signbias[3]
Definition: vp9.h:154
uint8_t mv_mode[7][3]
Definition: vp9data.h:1433
uint8_t filter[4][2]
Definition: vp9data.h:1432
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:154
unsigned mv_joint[4]
Definition: vp9.c:108
static enum FilterMode vp9_filter_lut[3]
Definition: vp9data.h:219
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:176
Definition: vp9.h:50
int active_thread_type
Which multithreading methods are in use by the codec.
Definition: avcodec.h:2973
uint8_t intraonly
Definition: vp9.h:143
uint8_t refreshctx
Definition: vp9.h:149
const char * r
Definition: vf_curves.c:107
unsigned tile_row_start
Definition: vp9.c:86
static int decode_coeffs_8bpp(AVCodecContext *ctx)
Definition: vp9.c:2460
static void inter_recon_16bpp(AVCodecContext *ctx)
Definition: vp9.c:3057
uint8_t bpp_index
Definition: vp9.c:71
uint8_t intra
Definition: vp9.c:46
Definition: vp9.h:58
#define MERGE_CTX(step, rd)
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:1627
static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2263
struct VP9BitstreamHeader::@118 tiling
TxfmMode
Definition: vp9.h:63
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9.c:2763
void * hwaccel_picture_private
Definition: vp9.h:134
simple assert() macros that are a bit more flexible than ISO C assert().
static void find_ref_mvs(VP9Context *s, VP56mv *pmv, int ref, int z, int idx, int sb)
Definition: vp9.c:1050
static enum TxfmType vp9_intra_txfm_type[14]
Definition: vp9data.h:431
const char * name
Name of the codec implementation.
Definition: avcodec.h:3399
#define AV_PIX_FMT_YUV444P10
Definition: pixfmt.h:333
unsigned comp[5][2]
Definition: vp9.c:101
unsigned tx8p[2][2]
Definition: vp9.c:106
int16_t * uvblock_base[2]
Definition: vp9.c:158
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:80
uint8_t class0_fp[2][3]
Definition: vp9data.h:1448
#define AV_CODEC_CAP_FRAME_THREADS
Codec supports frame-level multithreading.
Definition: avcodec.h:919
Definition: vp9.c:55
Definition: vp9.h:33
uint8_t * above_filter_ctx
Definition: vp9.c:148
#define RETURN_DIRECT_MV(mv)
uint8_t hp
Definition: vp9data.h:1451
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:67
#define ONLY_IF_THREADS_ENABLED(x)
Define a function with only the non-default version specified.
Definition: internal.h:215
struct VP9Filter * lflvl
Definition: vp9.c:153
static void vp9_decode_flush(AVCodecContext *ctx)
Definition: vp9.c:4250
Definition: vp9.h:53
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl, int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
Definition: vp9.c:3664
unsigned c_b_size
Definition: vp9.c:63
#define th
Definition: regdef.h:75
static void decode_mode(AVCodecContext *ctx)
Definition: vp9.c:1424
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:252
int uses_2pass
Definition: vp9.h:131
#define E
Definition: avdct.c:32
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:787
struct VP9Context::@111 prob
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
#define FF_THREAD_FRAME
Decode more than one frame at once.
Definition: avcodec.h:2965
unsigned class0_fp[2][4]
Definition: vp9.c:114
int8_t uvdc_qdelta
Definition: vp9.h:168
#define scale_mv(n, dim)
#define FFMIN(a, b)
Definition: common.h:96
VP56mv left_mv_ctx[16][2]
Definition: vp9.c:127
uint8_t left_y_nnz_ctx[16]
Definition: vp9.c:125
enum TxfmMode txfmmode
Definition: vp9.h:190
Definition: vp9.h:36
int8_t ref[2]
Definition: vp9.h:123
uint8_t level[8 *8]
Definition: vp9.c:40
uint8_t keyframe
Definition: vp9.h:140
unsigned tile_rows
Definition: vp9.h:194
int col7
Definition: vp9.c:66
int8_t ydc_qdelta
Definition: vp9.h:168
static int decode_coeffs_16bpp(AVCodecContext *ctx)
Definition: vp9.c:2465
uint8_t left_mode_ctx[16]
Definition: vp9.c:126
static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3463
unsigned eob[4][2][2][6][6][2]
Definition: vp9.c:121
ITU-R BT2020 non-constant luminance system.
Definition: pixfmt.h:430
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:88
void ff_thread_report_progress(ThreadFrame *f, int n, int field)
Notify later decoding threads when part of their reference picture is ready.
uint8_t partition[4][4][3]
Definition: vp9data.h:1453
unsigned tx32p[2][4]
Definition: vp9.c:104
unsigned tx16p[2][3]
Definition: vp9.c:105
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9.c:2731
static void inter_recon_8bpp(AVCodecContext *ctx)
Definition: vp9.c:3052
struct VP9Context::@112::@114 mv_comp[2]
static const uint8_t bwh_tab[2][N_BS_SIZES][2]
Definition: vp9.c:167
uint8_t level
Definition: vp9.h:158
VP9SharedContext s
Definition: vp9.c:56
#define REF_FRAME_SEGMAP
Definition: vp9.h:207
uint8_t uvmode
Definition: vp9.c:46
uint8_t * above_partition_ctx
Definition: vp9.c:137
int n
Definition: avisynth_c.h:547
uint8_t tmp_y[64 *64 *2]
Definition: vp9.c:161
uint8_t left_comp_ctx[8]
Definition: vp9.c:134
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
uint8_t fixcompref
Definition: vp9.h:155
#define L(x)
Definition: vp56_arith.h:36
#define src
Definition: vp9dsp.c:530
static const int8_t vp9_inter_mode_tree[3][2]
Definition: vp9data.h:208
#define vp56_rac_get_prob
Definition: vp56.h:250
static int init_frames(AVCodecContext *ctx)
Definition: vp9.c:4261
uint8_t * above_segpred_ctx
Definition: vp9.c:144
#define FF_ARRAY_ELEMS(a)
unsigned tile_col_start
Definition: vp9.c:86
the normal 2^n-1 "JPEG" YUV ranges
Definition: pixfmt.h:443
unsigned intra[4][2]
Definition: vp9.c:100
#define mc
static const float pred[4]
Definition: siprdata.h:259
unsigned rows
Definition: vp9.c:79
Definition: vp9.h:103
unsigned sb_cols
Definition: vp9.c:79
Definition: vp9.h:57
static const int8_t mv[256][2]
Definition: 4xm.c:77
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:242
uint8_t sign
Definition: vp9data.h:1444
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:102
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:267
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
int row7
Definition: vp9.c:66
Definition: vp9.h:52
FilterMode
Definition: vp9.h:107
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:80
#define STORE_COEF(c, i, v)
unsigned class0[2]
Definition: vp9.c:112
VP56mv(* above_mv_ctx)[2]
Definition: vp9.c:149
Libavcodec external API header.
int16_t * block_base
Definition: vp9.c:158
int8_t sharpness
Definition: vp9.h:159
static const prob_context vp9_default_probs
Definition: vp9data.h:1456
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
static void fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
Definition: vp9.c:1305
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:209
#define memset_bpp(c, i1, v, i2, num)
int pass
Definition: vp9.c:65
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:449
static void finish(void)
Definition: movenc-test.c:299
BlockSize
Definition: vp9.h:46
int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
Wrapper around get_buffer() for frame-multithreaded codecs.
uint8_t left_skip_ctx[8]
Definition: vp9.c:130
main external API structure.
Definition: avcodec.h:1532
#define FASTDIV(a, b)
Definition: mathops.h:210
uint8_t * data
The data buffer.
Definition: buffer.h:89
uint8_t left_txfm_ctx[8]
Definition: vp9.c:131
int8_t mode[2]
Definition: vp9.h:164
static av_cold int vp9_decode_init(AVCodecContext *ctx)
Definition: vp9.c:4287
VP56RangeCoder * c_b
Definition: vp9.c:62
Definition: vp9.h:54
enum TxfmMode tx uvtx
Definition: vp9.c:50
unsigned single_ref[5][2][2]
Definition: vp9.c:102
uint8_t y_mode[4][9]
Definition: vp9data.h:1430
AVBufferRef * av_buffer_allocz(int size)
Same as av_buffer_alloc(), except the returned buffer will be initialized to zero.
Definition: buffer.c:82
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:312
uint8_t tx8p[2]
Definition: vp9data.h:1440
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:330
unsigned partition[4][4][4]
Definition: vp9.c:119
uint8_t * above_y_nnz_ctx
Definition: vp9.c:140
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:305
static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
Definition: vp9.c:2293
enum AVColorSpace colorspace
YUV colorspace type.
Definition: avcodec.h:2255
static const int16_t *const vp9_scans[5][4]
Definition: vp9data.h:594
uint8_t seg_id
Definition: vp9.c:46
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
uint8_t ss_h
Definition: vp9.c:70
static const int8_t vp9_filter_tree[2][2]
Definition: vp9data.h:214
#define AV_PIX_FMT_YUV440P12
Definition: pixfmt.h:336
enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
Wrapper around get_format() for frame-multithreaded codecs.
uint8_t left_filter_ctx[8]
Definition: vp9.c:136
uint8_t last_bpp
Definition: vp9.c:71
uint8_t * above_intra_ctx
Definition: vp9.c:145
enum BlockSize bs
Definition: vp9.c:49
struct VP9Context::@113 min_mv
#define assign_bpp(c, i1, v, i2)
int allocate_progress
Whether to allocate progress for frame threading.
Definition: internal.h:126
static unsigned int get_bits_long(GetBitContext *s, int n)
Read 0-32 bits.
Definition: get_bits.h:345
uint8_t * dst[3]
Definition: vp9.c:67
HW decoding through DXVA2, Picture.data[3] contains a LPDIRECT3DSURFACE9 pointer. ...
Definition: pixfmt.h:149
VP56mv mv[4][2]
Definition: vp9.c:48
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:262
enum BlockPartition bp
Definition: vp9.c:52
static int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
Definition: vp56.h:320
VP9Block * b
Definition: vp9.c:64
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:347
static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
Definition: vp9.c:219
uint8_t * uveob[2]
Definition: vp9.c:159
uint8_t * above_mode_ctx
Definition: vp9.c:138
void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
Definition: vp56rac.c:40
Definition: vp56.h:65
Definition: vp9.h:48
struct VP9Context::@112 counts
Definition: vp9.h:59
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:331
uint8_t comp
Definition: vp9.c:46
ThreadFrame next_refs[8]
Definition: vp9.c:80
#define AV_PIX_FMT_YUV444P12
Definition: pixfmt.h:337
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:192
unsigned class0_hp[2]
Definition: vp9.c:116
Definition: vp9.h:102
Definition: vp9.h:84
the normal 219*2^(n-8) "MPEG" YUV ranges
Definition: pixfmt.h:442
#define LOCAL_ALIGNED_32(t, v,...)
Definition: internal.h:127
static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
Definition: ccaption_dec.c:572
MVJoint
Definition: vp9data.h:2245
static av_always_inline int inv_recenter_nonneg(int v, int m)
Definition: vp9.c:387
#define RETURN_MV(mv)
static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2283
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2130
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9dsp.h:70
#define AV_ZERO64(d)
Definition: intreadwrite.h:618
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:63
int16_t x
Definition: vp56.h:66
uint8_t class0
Definition: vp9data.h:1446
common internal api header.
if(ret< 0)
Definition: vf_mcdeint.c:282
static const uint8_t vp9_default_kf_partition_probs[4][4][3]
Definition: vp9data.h:35
uint8_t uv_mode[10][9]
Definition: vp9data.h:1431
unsigned filter[4][3]
Definition: vp9.c:98
static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
Definition: vp3.c:1915
#define assign(var, type, n)
int w
Definition: vp9.c:77
prob_context p
Definition: vp9.c:88
static double c[64]
Definition: vp9.h:55
uint8_t tmp_uv[2][64 *64 *2]
Definition: vp9.c:162
Definition: vp9.h:60
enum AVPixelFormat pix_fmt last_fmt gf_fmt
Definition: vp9.c:78
AVCodec ff_vp9_decoder
Definition: vp9.c:4350
unsigned sb_rows
Definition: vp9.c:79
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:92
struct VP9BitstreamHeader::@116 lf_delta
unsigned properties
Definition: avcodec.h:3345
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:304
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2831
static const int16_t vp9_dc_qlookup[3][256]
Definition: vp9data.h:225
Core video DSP helper functions.
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9.c:1378
static const int8_t vp9_mv_joint_tree[3][2]
Definition: vp9data.h:2252
enum BlockLevel bl
Definition: vp9.c:51
void * priv_data
Definition: avcodec.h:1574
unsigned bits[10][2]
Definition: vp9.c:113
#define t4
Definition: regdef.h:32
static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
Definition: vp9.c:1252
#define HWACCEL_MAX
#define av_free(p)
#define memset_val(c, val, num)
unsigned tile_row_end
Definition: vp9.c:86
int frame_priv_data_size
Size of per-frame hardware accelerator private data.
Definition: avcodec.h:3594
struct AVCodecInternal * internal
Private context used for internal data.
Definition: avcodec.h:1582
enum FilterMode filter
Definition: vp9.c:47
static int decode012(GetBitContext *gb)
Definition: get_bits.h:589
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:247
#define AV_ZERO32(d)
Definition: intreadwrite.h:614
static void decode_b(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9.c:3184
uint8_t edge_emu_buffer[135 *144 *2]
Definition: vp9.c:154
Definition: vp9.h:67
uint8_t bits[10]
Definition: vp9data.h:1447
static const uint8_t * align_get_bits(GetBitContext *s)
Definition: get_bits.h:457
#define AV_RN16A(p)
Definition: intreadwrite.h:522
#define DECODE_UV_COEF_LOOP(step, v)
uint8_t invisible
Definition: vp9.h:141
struct VP9Context::@109 filter_lut
int64_t dts
Decompression timestamp in AVStream->time_base units; the time at which the packet is decompressed...
Definition: avcodec.h:1466
static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1, int max_count, int update_factor)
Definition: vp9.c:3698
uint8_t use_last_frame_mvs
Definition: vp9.h:152
static const int16_t(*const [5][4] vp9_scans_nb)[2]
Definition: vp9data.h:1151
int height
Definition: frame.h:230
ThreadFrame refs[8]
Definition: vp9.h:204
uint8_t pred_prob[3]
Definition: vp9.h:177
#define av_freep(p)
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:101
VP9Block * b_base
Definition: vp9.c:64
void INT64 start
Definition: avisynth_c.h:553
Definition: vp9.h:126
#define av_always_inline
Definition: attributes.h:39
HW decoding through Direct3D11, Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer...
Definition: pixfmt.h:243
VP56mv mv[2]
Definition: vp9.h:122
static int update_prob(VP56RangeCoder *c, int p)
Definition: vp9.c:393
#define av_malloc_array(a, b)
ptrdiff_t y_stride
Definition: vp9.c:68
Definition: vp9.h:56
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2078
#define stride
uint8_t skip
Definition: vp9.c:46
const AVProfile ff_vp9_profiles[]
Definition: profiles.c:124
uint8_t * above_ref_ctx
Definition: vp9.c:147
uint8_t mv_joint[3]
Definition: vp9data.h:1442
int(* decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size)
Callback for each slice.
Definition: avcodec.h:3574
int8_t ref[4]
Definition: vp9.h:165
uint8_t left_intra_ctx[8]
Definition: vp9.c:133
#define AV_RN64A(p)
Definition: intreadwrite.h:530
unsigned classes[11]
Definition: vp9.c:111
int(* end_frame)(AVCodecContext *avctx)
Called at the end of each frame or field picture.
Definition: avcodec.h:3585
uint8_t highprecisionmvs
Definition: vp9.h:146
AVPixelFormat
Pixel format.
Definition: pixfmt.h:61
This structure stores compressed data.
Definition: avcodec.h:1444
uint8_t lossless
Definition: vp9.h:169
#define AV_GET_BUFFER_FLAG_REF
The decoder will keep a reference to the frame and may reuse it later.
Definition: avcodec.h:1241
uint8_t * above_comp_ctx
Definition: vp9.c:146
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:252
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:856
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1460
#define RETURN_SCALE_MV(mv, scale)
for(j=16;j >0;--j)
int block_alloc_using_2pass
Definition: vp9.c:157
Predicted.
Definition: avutil.h:267
int compressed_header_size
Definition: vp9.h:198
struct VP9BitstreamHeader::@117::@119 feat[MAX_SEGMENT]
static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2273
uint8_t refreshrefmask
Definition: vp9.h:145
static int read_colorspace_details(AVCodecContext *ctx)
Definition: vp9.c:451
static const int8_t vp9_partition_tree[3][2]
Definition: vp9data.h:29
static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9.c:2651
VP56RangeCoder c
Definition: vp9.c:61