FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
45  }
46  av_freep(&s->thread_data);
49  av_freep(&s->top_nnz);
50  av_freep(&s->top_border);
51 
52  s->macroblocks = NULL;
53 }
54 
56 {
57  int ret;
58  if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
59  return ret;
60  if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
62  } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
64  return AVERROR(ENOMEM);
65  }
66  return 0;
67 }
68 
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
70 {
71  if (f->ref_index[0]) {
72  if (prefer_delayed_free) {
73  /* Upon a size change, we want to free the maps but other threads may still
74  * be using them, so queue them. Upon a seek, all threads are inactive so
75  * we want to cache one to prevent re-allocation in the next decoding
76  * iteration, but the rest we can free directly. */
77  int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78  if (s->num_maps_to_be_freed < max_queued_maps) {
80  } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81  av_free(f->ref_index[0]);
82  } /* else: MEMLEAK (should never happen, but better that than crash) */
83  f->ref_index[0] = NULL;
84  } else /* vp8_decode_free() */ {
85  av_free(f->ref_index[0]);
86  }
87  }
89 }
90 
92  int prefer_delayed_free, int can_direct_free, int free_mem)
93 {
94  VP8Context *s = avctx->priv_data;
95  int i;
96 
97  if (!avctx->internal->is_copy) {
98  for (i = 0; i < 5; i++)
99  if (s->frames[i].data[0])
100  vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
101  }
102  memset(s->framep, 0, sizeof(s->framep));
103 
104  if (free_mem) {
105  free_buffers(s);
106  s->maps_are_invalid = 1;
107  }
108 }
109 
110 static void vp8_decode_flush(AVCodecContext *avctx)
111 {
112  vp8_decode_flush_impl(avctx, 1, 1, 0);
113 }
114 
115 static int update_dimensions(VP8Context *s, int width, int height)
116 {
117  AVCodecContext *avctx = s->avctx;
118  int i;
119 
120  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
121  height != s->avctx->height) {
122  if (av_image_check_size(width, height, 0, s->avctx))
123  return AVERROR_INVALIDDATA;
124 
125  vp8_decode_flush_impl(s->avctx, 1, 0, 1);
126 
127  avcodec_set_dimensions(s->avctx, width, height);
128  }
129 
130  s->mb_width = (s->avctx->coded_width +15) / 16;
131  s->mb_height = (s->avctx->coded_height+15) / 16;
132 
134  if (!s->mb_layout) { // Frame threading and one thread
135  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
137  }
138  else // Sliced threading
139  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
143 
144  for (i = 0; i < MAX_THREADS; i++) {
146 #if HAVE_THREADS
147  pthread_mutex_init(&s->thread_data[i].lock, NULL);
148  pthread_cond_init(&s->thread_data[i].cond, NULL);
149 #endif
150  }
151 
152  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153  (!s->intra4x4_pred_mode_top && !s->mb_layout))
154  return AVERROR(ENOMEM);
155 
156  s->macroblocks = s->macroblocks_base + 1;
157 
158  return 0;
159 }
160 
162 {
163  VP56RangeCoder *c = &s->c;
164  int i;
165 
167 
168  if (vp8_rac_get(c)) { // update segment feature data
170 
171  for (i = 0; i < 4; i++)
173 
174  for (i = 0; i < 4; i++)
176  }
177  if (s->segmentation.update_map)
178  for (i = 0; i < 3; i++)
179  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
180 }
181 
183 {
184  VP56RangeCoder *c = &s->c;
185  int i;
186 
187  for (i = 0; i < 4; i++) {
188  if (vp8_rac_get(c)) {
189  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
190 
191  if (vp8_rac_get(c))
192  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193  }
194  }
195 
196  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197  if (vp8_rac_get(c)) {
198  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
199 
200  if (vp8_rac_get(c))
201  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
202  }
203  }
204 }
205 
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
207 {
208  const uint8_t *sizes = buf;
209  int i;
210 
211  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
212 
213  buf += 3*(s->num_coeff_partitions-1);
214  buf_size -= 3*(s->num_coeff_partitions-1);
215  if (buf_size < 0)
216  return -1;
217 
218  for (i = 0; i < s->num_coeff_partitions-1; i++) {
219  int size = AV_RL24(sizes + 3*i);
220  if (buf_size - size < 0)
221  return -1;
222 
223  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
224  buf += size;
225  buf_size -= size;
226  }
227  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
228 
229  return 0;
230 }
231 
232 static void get_quants(VP8Context *s)
233 {
234  VP56RangeCoder *c = &s->c;
235  int i, base_qi;
236 
237  int yac_qi = vp8_rac_get_uint(c, 7);
238  int ydc_delta = vp8_rac_get_sint(c, 4);
239  int y2dc_delta = vp8_rac_get_sint(c, 4);
240  int y2ac_delta = vp8_rac_get_sint(c, 4);
241  int uvdc_delta = vp8_rac_get_sint(c, 4);
242  int uvac_delta = vp8_rac_get_sint(c, 4);
243 
244  for (i = 0; i < 4; i++) {
245  if (s->segmentation.enabled) {
246  base_qi = s->segmentation.base_quant[i];
247  if (!s->segmentation.absolute_vals)
248  base_qi += yac_qi;
249  } else
250  base_qi = yac_qi;
251 
252  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255  /* 101581>>16 is equivalent to 155/100 */
256  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
257  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
258  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 
260  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
261  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
262  }
263 }
264 
265 /**
266  * Determine which buffers golden and altref should be updated with after this frame.
267  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
268  *
269  * Intra frames update all 3 references
270  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
271  * If the update (golden|altref) flag is set, it's updated with the current frame
272  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
273  * If the flag is not set, the number read means:
274  * 0: no update
275  * 1: VP56_FRAME_PREVIOUS
276  * 2: update golden with altref, or update altref with golden
277  */
278 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
279 {
280  VP56RangeCoder *c = &s->c;
281 
282  if (update)
283  return VP56_FRAME_CURRENT;
284 
285  switch (vp8_rac_get_uint(c, 2)) {
286  case 1:
287  return VP56_FRAME_PREVIOUS;
288  case 2:
290  }
291  return VP56_FRAME_NONE;
292 }
293 
294 static void update_refs(VP8Context *s)
295 {
296  VP56RangeCoder *c = &s->c;
297 
298  int update_golden = vp8_rac_get(c);
299  int update_altref = vp8_rac_get(c);
300 
301  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
302  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
303 }
304 
305 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
306 {
307  VP56RangeCoder *c = &s->c;
308  int header_size, hscale, vscale, i, j, k, l, m, ret;
309  int width = s->avctx->width;
310  int height = s->avctx->height;
311 
312  s->keyframe = !(buf[0] & 1);
313  s->profile = (buf[0]>>1) & 7;
314  s->invisible = !(buf[0] & 0x10);
315  header_size = AV_RL24(buf) >> 5;
316  buf += 3;
317  buf_size -= 3;
318 
319  if (s->profile > 3)
320  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
321 
322  if (!s->profile)
323  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
324  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
326 
327  if (header_size > buf_size - 7*s->keyframe) {
328  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
329  return AVERROR_INVALIDDATA;
330  }
331 
332  if (s->keyframe) {
333  if (AV_RL24(buf) != 0x2a019d) {
334  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
335  return AVERROR_INVALIDDATA;
336  }
337  width = AV_RL16(buf+3) & 0x3fff;
338  height = AV_RL16(buf+5) & 0x3fff;
339  hscale = buf[4] >> 6;
340  vscale = buf[6] >> 6;
341  buf += 7;
342  buf_size -= 7;
343 
344  if (hscale || vscale)
345  av_log_missing_feature(s->avctx, "Upscaling", 1);
346 
348  for (i = 0; i < 4; i++)
349  for (j = 0; j < 16; j++)
350  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
351  sizeof(s->prob->token[i][j]));
352  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
353  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
354  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
355  memset(&s->segmentation, 0, sizeof(s->segmentation));
356  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
357  }
358 
359  ff_vp56_init_range_decoder(c, buf, header_size);
360  buf += header_size;
361  buf_size -= header_size;
362 
363  if (s->keyframe) {
364  if (vp8_rac_get(c))
365  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
366  vp8_rac_get(c); // whether we can skip clamping in dsp functions
367  }
368 
369  if ((s->segmentation.enabled = vp8_rac_get(c)))
371  else
372  s->segmentation.update_map = 0; // FIXME: move this to some init function?
373 
374  s->filter.simple = vp8_rac_get(c);
375  s->filter.level = vp8_rac_get_uint(c, 6);
376  s->filter.sharpness = vp8_rac_get_uint(c, 3);
377 
378  if ((s->lf_delta.enabled = vp8_rac_get(c)))
379  if (vp8_rac_get(c))
380  update_lf_deltas(s);
381 
382  if (setup_partitions(s, buf, buf_size)) {
383  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
384  return AVERROR_INVALIDDATA;
385  }
386 
387  if (!s->macroblocks_base || /* first frame */
388  width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
389  if ((ret = update_dimensions(s, width, height)) < 0)
390  return ret;
391  }
392 
393  get_quants(s);
394 
395  if (!s->keyframe) {
396  update_refs(s);
398  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
399  }
400 
401  // if we aren't saving this frame's probabilities for future frames,
402  // make a copy of the current probabilities
403  if (!(s->update_probabilities = vp8_rac_get(c)))
404  s->prob[1] = s->prob[0];
405 
406  s->update_last = s->keyframe || vp8_rac_get(c);
407 
408  for (i = 0; i < 4; i++)
409  for (j = 0; j < 8; j++)
410  for (k = 0; k < 3; k++)
411  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
413  int prob = vp8_rac_get_uint(c, 8);
414  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
415  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416  }
417 
418  if ((s->mbskip_enabled = vp8_rac_get(c)))
419  s->prob->mbskip = vp8_rac_get_uint(c, 8);
420 
421  if (!s->keyframe) {
422  s->prob->intra = vp8_rac_get_uint(c, 8);
423  s->prob->last = vp8_rac_get_uint(c, 8);
424  s->prob->golden = vp8_rac_get_uint(c, 8);
425 
426  if (vp8_rac_get(c))
427  for (i = 0; i < 4; i++)
428  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
429  if (vp8_rac_get(c))
430  for (i = 0; i < 3; i++)
431  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
432 
433  // 17.2 MV probability update
434  for (i = 0; i < 2; i++)
435  for (j = 0; j < 19; j++)
437  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
438  }
439 
440  return 0;
441 }
442 
443 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
444 {
445  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
446  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
447 }
448 
449 /**
450  * Motion vector coding, 17.1.
451  */
453 {
454  int bit, x = 0;
455 
456  if (vp56_rac_get_prob_branchy(c, p[0])) {
457  int i;
458 
459  for (i = 0; i < 3; i++)
460  x += vp56_rac_get_prob(c, p[9 + i]) << i;
461  for (i = 9; i > 3; i--)
462  x += vp56_rac_get_prob(c, p[9 + i]) << i;
463  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
464  x += 8;
465  } else {
466  // small_mvtree
467  const uint8_t *ps = p+2;
468  bit = vp56_rac_get_prob(c, *ps);
469  ps += 1 + 3*bit;
470  x += 4*bit;
471  bit = vp56_rac_get_prob(c, *ps);
472  ps += 1 + bit;
473  x += 2*bit;
474  x += vp56_rac_get_prob(c, *ps);
475  }
476 
477  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
478 }
479 
480 static av_always_inline
481 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
482 {
483  if (left == top)
484  return vp8_submv_prob[4-!!left];
485  if (!top)
486  return vp8_submv_prob[2];
487  return vp8_submv_prob[1-!!left];
488 }
489 
490 /**
491  * Split motion vector prediction, 16.4.
492  * @returns the number of motion vectors parsed (2, 4 or 16)
493  */
494 static av_always_inline
496 {
497  int part_idx;
498  int n, num;
499  VP8Macroblock *top_mb;
500  VP8Macroblock *left_mb = &mb[-1];
501  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
502  *mbsplits_top,
503  *mbsplits_cur, *firstidx;
504  VP56mv *top_mv;
505  VP56mv *left_mv = left_mb->bmv;
506  VP56mv *cur_mv = mb->bmv;
507 
508  if (!layout) // layout is inlined, s->mb_layout is not
509  top_mb = &mb[2];
510  else
511  top_mb = &mb[-s->mb_width-1];
512  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
513  top_mv = top_mb->bmv;
514 
518  } else {
519  part_idx = VP8_SPLITMVMODE_8x8;
520  }
521  } else {
522  part_idx = VP8_SPLITMVMODE_4x4;
523  }
524 
525  num = vp8_mbsplit_count[part_idx];
526  mbsplits_cur = vp8_mbsplits[part_idx],
527  firstidx = vp8_mbfirstidx[part_idx];
528  mb->partitioning = part_idx;
529 
530  for (n = 0; n < num; n++) {
531  int k = firstidx[n];
532  uint32_t left, above;
533  const uint8_t *submv_prob;
534 
535  if (!(k & 3))
536  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
537  else
538  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
539  if (k <= 3)
540  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
541  else
542  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
543 
544  submv_prob = get_submv_prob(left, above);
545 
546  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
547  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
548  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
549  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
550  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
551  } else {
552  AV_ZERO32(&mb->bmv[n]);
553  }
554  } else {
555  AV_WN32A(&mb->bmv[n], above);
556  }
557  } else {
558  AV_WN32A(&mb->bmv[n], left);
559  }
560  }
561 
562  return num;
563 }
564 
565 static av_always_inline
566 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
567 {
568  VP8Macroblock *mb_edge[3] = { 0 /* top */,
569  mb - 1 /* left */,
570  0 /* top-left */ };
571  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
572  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
573  int idx = CNT_ZERO;
574  int cur_sign_bias = s->sign_bias[mb->ref_frame];
575  int8_t *sign_bias = s->sign_bias;
576  VP56mv near_mv[4];
577  uint8_t cnt[4] = { 0 };
578  VP56RangeCoder *c = &s->c;
579 
580  if (!layout) { // layout is inlined (s->mb_layout is not)
581  mb_edge[0] = mb + 2;
582  mb_edge[2] = mb + 1;
583  }
584  else {
585  mb_edge[0] = mb - s->mb_width-1;
586  mb_edge[2] = mb - s->mb_width-2;
587  }
588 
589  AV_ZERO32(&near_mv[0]);
590  AV_ZERO32(&near_mv[1]);
591  AV_ZERO32(&near_mv[2]);
592 
593  /* Process MB on top, left and top-left */
594  #define MV_EDGE_CHECK(n)\
595  {\
596  VP8Macroblock *edge = mb_edge[n];\
597  int edge_ref = edge->ref_frame;\
598  if (edge_ref != VP56_FRAME_CURRENT) {\
599  uint32_t mv = AV_RN32A(&edge->mv);\
600  if (mv) {\
601  if (cur_sign_bias != sign_bias[edge_ref]) {\
602  /* SWAR negate of the values in mv. */\
603  mv = ~mv;\
604  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
605  }\
606  if (!n || mv != AV_RN32A(&near_mv[idx]))\
607  AV_WN32A(&near_mv[++idx], mv);\
608  cnt[idx] += 1 + (n != 2);\
609  } else\
610  cnt[CNT_ZERO] += 1 + (n != 2);\
611  }\
612  }
613 
614  MV_EDGE_CHECK(0)
615  MV_EDGE_CHECK(1)
616  MV_EDGE_CHECK(2)
617 
619  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
620  mb->mode = VP8_MVMODE_MV;
621 
622  /* If we have three distinct MVs, merge first and last if they're the same */
623  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
624  cnt[CNT_NEAREST] += 1;
625 
626  /* Swap near and nearest if necessary */
627  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
628  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
629  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
630  }
631 
632  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
633  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
634 
635  /* Choose the best mv out of 0,0 and the nearest mv */
636  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
637  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
638  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
639  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
640 
641  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
642  mb->mode = VP8_MVMODE_SPLIT;
643  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
644  } else {
645  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
646  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
647  mb->bmv[0] = mb->mv;
648  }
649  } else {
650  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
651  mb->bmv[0] = mb->mv;
652  }
653  } else {
654  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
655  mb->bmv[0] = mb->mv;
656  }
657  } else {
658  mb->mode = VP8_MVMODE_ZERO;
659  AV_ZERO32(&mb->mv);
660  mb->bmv[0] = mb->mv;
661  }
662 }
663 
664 static av_always_inline
666  int mb_x, int keyframe, int layout)
667 {
668  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
669 
670  if (layout == 1) {
671  VP8Macroblock *mb_top = mb - s->mb_width - 1;
672  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
673  }
674  if (keyframe) {
675  int x, y;
676  uint8_t* top;
677  uint8_t* const left = s->intra4x4_pred_mode_left;
678  if (layout == 1)
679  top = mb->intra4x4_pred_mode_top;
680  else
681  top = s->intra4x4_pred_mode_top + 4 * mb_x;
682  for (y = 0; y < 4; y++) {
683  for (x = 0; x < 4; x++) {
684  const uint8_t *ctx;
685  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
686  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
687  left[y] = top[x] = *intra4x4;
688  intra4x4++;
689  }
690  }
691  } else {
692  int i;
693  for (i = 0; i < 16; i++)
695  }
696 }
697 
698 static av_always_inline
699 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
700  uint8_t *segment, uint8_t *ref, int layout)
701 {
702  VP56RangeCoder *c = &s->c;
703 
704  if (s->segmentation.update_map) {
705  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
706  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
707  } else if (s->segmentation.enabled)
708  *segment = ref ? *ref : *segment;
709  mb->segment = *segment;
710 
711  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
712 
713  if (s->keyframe) {
715 
716  if (mb->mode == MODE_I4x4) {
717  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
718  } else {
719  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
720  if (s->mb_layout == 1)
721  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
722  else
723  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
724  AV_WN32A( s->intra4x4_pred_mode_left, modes);
725  }
726 
729  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
730  // inter MB, 16.2
731  if (vp56_rac_get_prob_branchy(c, s->prob->last))
732  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
733  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
734  else
736  s->ref_count[mb->ref_frame-1]++;
737 
738  // motion vectors, 16.3
739  decode_mvs(s, mb, mb_x, mb_y, layout);
740  } else {
741  // intra MB, 16.1
743 
744  if (mb->mode == MODE_I4x4)
745  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
746 
750  AV_ZERO32(&mb->bmv[0]);
751  }
752 }
753 
754 #ifndef decode_block_coeffs_internal
755 /**
756  * @param r arithmetic bitstream reader context
757  * @param block destination for block coefficients
758  * @param probs probabilities to use when reading trees from the bitstream
759  * @param i initial coeff index, 0 unless a separate DC block is coded
760  * @param qmul array holding the dc/ac dequant factor at position 0/1
761  * @return 0 if no coeffs were decoded
762  * otherwise, the index of the last coeff decoded plus one
763  */
765  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
766  int i, uint8_t *token_prob, int16_t qmul[2])
767 {
768  VP56RangeCoder c = *r;
769  goto skip_eob;
770  do {
771  int coeff;
772  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773  break;
774 
775 skip_eob:
776  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
777  if (++i == 16)
778  break; // invalid input; blocks should end with EOB
779  token_prob = probs[i][0];
780  goto skip_eob;
781  }
782 
783  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
784  coeff = 1;
785  token_prob = probs[i+1][1];
786  } else {
787  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
788  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
789  if (coeff)
790  coeff += vp56_rac_get_prob(&c, token_prob[5]);
791  coeff += 2;
792  } else {
793  // DCT_CAT*
794  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
795  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
796  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
797  } else { // DCT_CAT2
798  coeff = 7;
799  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
800  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
801  }
802  } else { // DCT_CAT3 and up
803  int a = vp56_rac_get_prob(&c, token_prob[8]);
804  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
805  int cat = (a<<1) + b;
806  coeff = 3 + (8<<cat);
807  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
808  }
809  }
810  token_prob = probs[i+1][2];
811  }
812  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
813  } while (++i < 16);
814 
815  *r = c;
816  return i;
817 }
818 #endif
819 
820 /**
821  * @param c arithmetic bitstream reader context
822  * @param block destination for block coefficients
823  * @param probs probabilities to use when reading trees from the bitstream
824  * @param i initial coeff index, 0 unless a separate DC block is coded
825  * @param zero_nhood the initial prediction context for number of surrounding
826  * all-zero blocks (only left/top, so 0-2)
827  * @param qmul array holding the dc/ac dequant factor at position 0/1
828  * @return 0 if no coeffs were decoded
829  * otherwise, the index of the last coeff decoded plus one
830  */
831 static av_always_inline
833  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
834  int i, int zero_nhood, int16_t qmul[2])
835 {
836  uint8_t *token_prob = probs[i][zero_nhood];
837  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
838  return 0;
839  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
840 }
841 
842 static av_always_inline
844  uint8_t t_nnz[9], uint8_t l_nnz[9])
845 {
846  int i, x, y, luma_start = 0, luma_ctx = 3;
847  int nnz_pred, nnz, nnz_total = 0;
848  int segment = mb->segment;
849  int block_dc = 0;
850 
851  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
852  nnz_pred = t_nnz[8] + l_nnz[8];
853 
854  // decode DC values and do hadamard
855  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
856  s->qmat[segment].luma_dc_qmul);
857  l_nnz[8] = t_nnz[8] = !!nnz;
858  if (nnz) {
859  nnz_total += nnz;
860  block_dc = 1;
861  if (nnz == 1)
863  else
864  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
865  }
866  luma_start = 1;
867  luma_ctx = 0;
868  }
869 
870  // luma blocks
871  for (y = 0; y < 4; y++)
872  for (x = 0; x < 4; x++) {
873  nnz_pred = l_nnz[y] + t_nnz[x];
874  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
875  nnz_pred, s->qmat[segment].luma_qmul);
876  // nnz+block_dc may be one more than the actual last index, but we don't care
877  td->non_zero_count_cache[y][x] = nnz + block_dc;
878  t_nnz[x] = l_nnz[y] = !!nnz;
879  nnz_total += nnz;
880  }
881 
882  // chroma blocks
883  // TODO: what to do about dimensions? 2nd dim for luma is x,
884  // but for chroma it's (y<<1)|x
885  for (i = 4; i < 6; i++)
886  for (y = 0; y < 2; y++)
887  for (x = 0; x < 2; x++) {
888  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
889  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
890  nnz_pred, s->qmat[segment].chroma_qmul);
891  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
892  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893  nnz_total += nnz;
894  }
895 
896  // if there were no coded coeffs despite the macroblock not being marked skip,
897  // we MUST not do the inner loop filter and should not do IDCT
898  // Since skip isn't used for bitstream prediction, just manually set it.
899  if (!nnz_total)
900  mb->skip = 1;
901 }
902 
903 static av_always_inline
904 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
905  int linesize, int uvlinesize, int simple)
906 {
907  AV_COPY128(top_border, src_y + 15*linesize);
908  if (!simple) {
909  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
910  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911  }
912 }
913 
914 static av_always_inline
915 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
916  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
917  int simple, int xchg)
918 {
919  uint8_t *top_border_m1 = top_border-32; // for TL prediction
920  src_y -= linesize;
921  src_cb -= uvlinesize;
922  src_cr -= uvlinesize;
923 
924 #define XCHG(a,b,xchg) do { \
925  if (xchg) AV_SWAP64(b,a); \
926  else AV_COPY64(b,a); \
927  } while (0)
928 
929  XCHG(top_border_m1+8, src_y-8, xchg);
930  XCHG(top_border, src_y, xchg);
931  XCHG(top_border+8, src_y+8, 1);
932  if (mb_x < mb_width-1)
933  XCHG(top_border+32, src_y+16, 1);
934 
935  // only copy chroma for normal loop filter
936  // or to initialize the top row to 127
937  if (!simple || !mb_y) {
938  XCHG(top_border_m1+16, src_cb-8, xchg);
939  XCHG(top_border_m1+24, src_cr-8, xchg);
940  XCHG(top_border+16, src_cb, 1);
941  XCHG(top_border+24, src_cr, 1);
942  }
943 }
944 
945 static av_always_inline
946 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
947 {
948  if (!mb_x) {
949  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
950  } else {
951  return mb_y ? mode : LEFT_DC_PRED8x8;
952  }
953 }
954 
955 static av_always_inline
956 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
957 {
958  if (!mb_x) {
959  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
960  } else {
961  return mb_y ? mode : HOR_PRED8x8;
962  }
963 }
964 
965 static av_always_inline
966 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
967 {
968  if (mode == DC_PRED8x8) {
969  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
970  } else {
971  return mode;
972  }
973 }
974 
975 static av_always_inline
976 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
977 {
978  switch (mode) {
979  case DC_PRED8x8:
980  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
981  case VERT_PRED8x8:
982  return !mb_y ? DC_127_PRED8x8 : mode;
983  case HOR_PRED8x8:
984  return !mb_x ? DC_129_PRED8x8 : mode;
985  case PLANE_PRED8x8 /*TM*/:
986  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
987  }
988  return mode;
989 }
990 
991 static av_always_inline
992 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
993 {
994  if (!mb_x) {
995  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
996  } else {
997  return mb_y ? mode : HOR_VP8_PRED;
998  }
999 }
1000 
1001 static av_always_inline
1002 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1003 {
1004  switch (mode) {
1005  case VERT_PRED:
1006  if (!mb_x && mb_y) {
1007  *copy_buf = 1;
1008  return mode;
1009  }
1010  /* fall-through */
1011  case DIAG_DOWN_LEFT_PRED:
1012  case VERT_LEFT_PRED:
1013  return !mb_y ? DC_127_PRED : mode;
1014  case HOR_PRED:
1015  if (!mb_y) {
1016  *copy_buf = 1;
1017  return mode;
1018  }
1019  /* fall-through */
1020  case HOR_UP_PRED:
1021  return !mb_x ? DC_129_PRED : mode;
1022  case TM_VP8_PRED:
1023  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1024  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1025  case DIAG_DOWN_RIGHT_PRED:
1026  case VERT_RIGHT_PRED:
1027  case HOR_DOWN_PRED:
1028  if (!mb_y || !mb_x)
1029  *copy_buf = 1;
1030  return mode;
1031  }
1032  return mode;
1033 }
1034 
1035 static av_always_inline
1037  VP8Macroblock *mb, int mb_x, int mb_y)
1038 {
1039  AVCodecContext *avctx = s->avctx;
1040  int x, y, mode, nnz;
1041  uint32_t tr;
1042 
1043  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1044  // otherwise, skip it if we aren't going to deblock
1045  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1046  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1047  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1048  s->filter.simple, 1);
1049 
1050  if (mb->mode < MODE_I4x4) {
1051  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1052  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1053  } else {
1054  mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1055  }
1056  s->hpc.pred16x16[mode](dst[0], s->linesize);
1057  } else {
1058  uint8_t *ptr = dst[0];
1059  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1060  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1061 
1062  // all blocks on the right edge of the macroblock use bottom edge
1063  // the top macroblock for their topright edge
1064  uint8_t *tr_right = ptr - s->linesize + 16;
1065 
1066  // if we're on the right edge of the frame, said edge is extended
1067  // from the top macroblock
1068  if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1069  mb_x == s->mb_width-1) {
1070  tr = tr_right[-1]*0x01010101u;
1071  tr_right = (uint8_t *)&tr;
1072  }
1073 
1074  if (mb->skip)
1076 
1077  for (y = 0; y < 4; y++) {
1078  uint8_t *topright = ptr + 4 - s->linesize;
1079  for (x = 0; x < 4; x++) {
1080  int copy = 0, linesize = s->linesize;
1081  uint8_t *dst = ptr+4*x;
1082  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1083 
1084  if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1085  topright = tr_top;
1086  } else if (x == 3)
1087  topright = tr_right;
1088 
1089  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1090  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1091  if (copy) {
1092  dst = copy_dst + 12;
1093  linesize = 8;
1094  if (!(mb_y + y)) {
1095  copy_dst[3] = 127U;
1096  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1097  } else {
1098  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1099  if (!(mb_x + x)) {
1100  copy_dst[3] = 129U;
1101  } else {
1102  copy_dst[3] = ptr[4*x-s->linesize-1];
1103  }
1104  }
1105  if (!(mb_x + x)) {
1106  copy_dst[11] =
1107  copy_dst[19] =
1108  copy_dst[27] =
1109  copy_dst[35] = 129U;
1110  } else {
1111  copy_dst[11] = ptr[4*x -1];
1112  copy_dst[19] = ptr[4*x+s->linesize -1];
1113  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1114  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1115  }
1116  }
1117  } else {
1118  mode = intra4x4[x];
1119  }
1120  s->hpc.pred4x4[mode](dst, topright, linesize);
1121  if (copy) {
1122  AV_COPY32(ptr+4*x , copy_dst+12);
1123  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1124  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1125  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1126  }
1127 
1128  nnz = td->non_zero_count_cache[y][x];
1129  if (nnz) {
1130  if (nnz == 1)
1131  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1132  else
1133  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1134  }
1135  topright += 4;
1136  }
1137 
1138  ptr += 4*s->linesize;
1139  intra4x4 += 4;
1140  }
1141  }
1142 
1143  if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1144  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1145  } else {
1146  mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1147  }
1148  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1149  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1150 
1151  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1152  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1153  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1154  s->filter.simple, 0);
1155 }
1156 
1157 static const uint8_t subpel_idx[3][8] = {
1158  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1159  // also function pointer index
1160  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1161  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1162 };
1163 
1164 /**
1165  * luma MC function
1166  *
1167  * @param s VP8 decoding context
1168  * @param dst target buffer for block data at block position
1169  * @param ref reference picture buffer at origin (0, 0)
1170  * @param mv motion vector (relative to block position) to get pixel data from
1171  * @param x_off horizontal position of block from origin (0, 0)
1172  * @param y_off vertical position of block from origin (0, 0)
1173  * @param block_w width of block (16, 8 or 4)
1174  * @param block_h height of block (always same as block_w)
1175  * @param width width of src/dst plane data
1176  * @param height height of src/dst plane data
1177  * @param linesize size of a single line of plane data, including padding
1178  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1179  */
1180 static av_always_inline
1182  AVFrame *ref, const VP56mv *mv,
1183  int x_off, int y_off, int block_w, int block_h,
1184  int width, int height, int linesize,
1185  vp8_mc_func mc_func[3][3])
1186 {
1187  uint8_t *src = ref->data[0];
1188 
1189  if (AV_RN32A(mv)) {
1190 
1191  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1192  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1193 
1194  x_off += mv->x >> 2;
1195  y_off += mv->y >> 2;
1196 
1197  // edge emulation
1198  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1199  src += y_off * linesize + x_off;
1200  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1201  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1202  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1203  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1204  x_off - mx_idx, y_off - my_idx, width, height);
1205  src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1206  }
1207  mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1208  } else {
1209  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1210  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1211  }
1212 }
1213 
1214 /**
1215  * chroma MC function
1216  *
1217  * @param s VP8 decoding context
1218  * @param dst1 target buffer for block data at block position (U plane)
1219  * @param dst2 target buffer for block data at block position (V plane)
1220  * @param ref reference picture buffer at origin (0, 0)
1221  * @param mv motion vector (relative to block position) to get pixel data from
1222  * @param x_off horizontal position of block from origin (0, 0)
1223  * @param y_off vertical position of block from origin (0, 0)
1224  * @param block_w width of block (16, 8 or 4)
1225  * @param block_h height of block (always same as block_w)
1226  * @param width width of src/dst plane data
1227  * @param height height of src/dst plane data
1228  * @param linesize size of a single line of plane data, including padding
1229  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1230  */
1231 static av_always_inline
1233  AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1234  int block_w, int block_h, int width, int height, int linesize,
1235  vp8_mc_func mc_func[3][3])
1236 {
1237  uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1238 
1239  if (AV_RN32A(mv)) {
1240  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1241  int my = mv->y&7, my_idx = subpel_idx[0][my];
1242 
1243  x_off += mv->x >> 3;
1244  y_off += mv->y >> 3;
1245 
1246  // edge emulation
1247  src1 += y_off * linesize + x_off;
1248  src2 += y_off * linesize + x_off;
1249  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1250  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1251  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1252  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1253  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1254  x_off - mx_idx, y_off - my_idx, width, height);
1255  src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1256  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1257 
1258  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1259  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1260  x_off - mx_idx, y_off - my_idx, width, height);
1261  src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1262  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1263  } else {
1264  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1265  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1266  }
1267  } else {
1268  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1269  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1270  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1271  }
1272 }
1273 
1274 static av_always_inline
1276  AVFrame *ref_frame, int x_off, int y_off,
1277  int bx_off, int by_off,
1278  int block_w, int block_h,
1279  int width, int height, VP56mv *mv)
1280 {
1281  VP56mv uvmv = *mv;
1282 
1283  /* Y */
1284  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1285  ref_frame, mv, x_off + bx_off, y_off + by_off,
1286  block_w, block_h, width, height, s->linesize,
1287  s->put_pixels_tab[block_w == 8]);
1288 
1289  /* U/V */
1290  if (s->profile == 3) {
1291  uvmv.x &= ~7;
1292  uvmv.y &= ~7;
1293  }
1294  x_off >>= 1; y_off >>= 1;
1295  bx_off >>= 1; by_off >>= 1;
1296  width >>= 1; height >>= 1;
1297  block_w >>= 1; block_h >>= 1;
1298  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1299  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1300  &uvmv, x_off + bx_off, y_off + by_off,
1301  block_w, block_h, width, height, s->uvlinesize,
1302  s->put_pixels_tab[1 + (block_w == 4)]);
1303 }
1304 
1305 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1306  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1307 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1308 {
1309  /* Don't prefetch refs that haven't been used very often this frame. */
1310  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1311  int x_off = mb_x << 4, y_off = mb_y << 4;
1312  int mx = (mb->mv.x>>2) + x_off + 8;
1313  int my = (mb->mv.y>>2) + y_off;
1314  uint8_t **src= s->framep[ref]->data;
1315  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1316  /* For threading, a ff_thread_await_progress here might be useful, but
1317  * it actually slows down the decoder. Since a bad prefetch doesn't
1318  * generate bad decoder output, we don't run it here. */
1319  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1320  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1321  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1322  }
1323 }
1324 
1325 /**
1326  * Apply motion vectors to prediction buffer, chapter 18.
1327  */
1328 static av_always_inline
1330  VP8Macroblock *mb, int mb_x, int mb_y)
1331 {
1332  int x_off = mb_x << 4, y_off = mb_y << 4;
1333  int width = 16*s->mb_width, height = 16*s->mb_height;
1334  AVFrame *ref = s->framep[mb->ref_frame];
1335  VP56mv *bmv = mb->bmv;
1336 
1337  switch (mb->partitioning) {
1338  case VP8_SPLITMVMODE_NONE:
1339  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1340  0, 0, 16, 16, width, height, &mb->mv);
1341  break;
1342  case VP8_SPLITMVMODE_4x4: {
1343  int x, y;
1344  VP56mv uvmv;
1345 
1346  /* Y */
1347  for (y = 0; y < 4; y++) {
1348  for (x = 0; x < 4; x++) {
1349  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1350  ref, &bmv[4*y + x],
1351  4*x + x_off, 4*y + y_off, 4, 4,
1352  width, height, s->linesize,
1353  s->put_pixels_tab[2]);
1354  }
1355  }
1356 
1357  /* U/V */
1358  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1359  for (y = 0; y < 2; y++) {
1360  for (x = 0; x < 2; x++) {
1361  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1362  mb->bmv[ 2*y * 4 + 2*x+1].x +
1363  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1364  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1365  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1366  mb->bmv[ 2*y * 4 + 2*x+1].y +
1367  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1368  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1369  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1370  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1371  if (s->profile == 3) {
1372  uvmv.x &= ~7;
1373  uvmv.y &= ~7;
1374  }
1375  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1376  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1377  4*x + x_off, 4*y + y_off, 4, 4,
1378  width, height, s->uvlinesize,
1379  s->put_pixels_tab[2]);
1380  }
1381  }
1382  break;
1383  }
1384  case VP8_SPLITMVMODE_16x8:
1385  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386  0, 0, 16, 8, width, height, &bmv[0]);
1387  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1388  0, 8, 16, 8, width, height, &bmv[1]);
1389  break;
1390  case VP8_SPLITMVMODE_8x16:
1391  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392  0, 0, 8, 16, width, height, &bmv[0]);
1393  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1394  8, 0, 8, 16, width, height, &bmv[1]);
1395  break;
1396  case VP8_SPLITMVMODE_8x8:
1397  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398  0, 0, 8, 8, width, height, &bmv[0]);
1399  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1400  8, 0, 8, 8, width, height, &bmv[1]);
1401  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402  0, 8, 8, 8, width, height, &bmv[2]);
1403  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1404  8, 8, 8, 8, width, height, &bmv[3]);
1405  break;
1406  }
1407 }
1408 
1410  uint8_t *dst[3], VP8Macroblock *mb)
1411 {
1412  int x, y, ch;
1413 
1414  if (mb->mode != MODE_I4x4) {
1415  uint8_t *y_dst = dst[0];
1416  for (y = 0; y < 4; y++) {
1417  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1418  if (nnz4) {
1419  if (nnz4&~0x01010101) {
1420  for (x = 0; x < 4; x++) {
1421  if ((uint8_t)nnz4 == 1)
1422  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1423  else if((uint8_t)nnz4 > 1)
1424  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1425  nnz4 >>= 8;
1426  if (!nnz4)
1427  break;
1428  }
1429  } else {
1430  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1431  }
1432  }
1433  y_dst += 4*s->linesize;
1434  }
1435  }
1436 
1437  for (ch = 0; ch < 2; ch++) {
1438  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1439  if (nnz4) {
1440  uint8_t *ch_dst = dst[1+ch];
1441  if (nnz4&~0x01010101) {
1442  for (y = 0; y < 2; y++) {
1443  for (x = 0; x < 2; x++) {
1444  if ((uint8_t)nnz4 == 1)
1445  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1446  else if((uint8_t)nnz4 > 1)
1447  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1448  nnz4 >>= 8;
1449  if (!nnz4)
1450  goto chroma_idct_end;
1451  }
1452  ch_dst += 4*s->uvlinesize;
1453  }
1454  } else {
1455  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1456  }
1457  }
1458 chroma_idct_end: ;
1459  }
1460 }
1461 
1463 {
1464  int interior_limit, filter_level;
1465 
1466  if (s->segmentation.enabled) {
1467  filter_level = s->segmentation.filter_level[mb->segment];
1468  if (!s->segmentation.absolute_vals)
1469  filter_level += s->filter.level;
1470  } else
1471  filter_level = s->filter.level;
1472 
1473  if (s->lf_delta.enabled) {
1474  filter_level += s->lf_delta.ref[mb->ref_frame];
1475  filter_level += s->lf_delta.mode[mb->mode];
1476  }
1477 
1478  filter_level = av_clip_uintp2(filter_level, 6);
1479 
1480  interior_limit = filter_level;
1481  if (s->filter.sharpness) {
1482  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1483  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1484  }
1485  interior_limit = FFMAX(interior_limit, 1);
1486 
1487  f->filter_level = filter_level;
1488  f->inner_limit = interior_limit;
1489  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1490 }
1491 
1492 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1493 {
1494  int mbedge_lim, bedge_lim, hev_thresh;
1495  int filter_level = f->filter_level;
1496  int inner_limit = f->inner_limit;
1497  int inner_filter = f->inner_filter;
1498  int linesize = s->linesize;
1499  int uvlinesize = s->uvlinesize;
1500  static const uint8_t hev_thresh_lut[2][64] = {
1501  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1502  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1503  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1504  3, 3, 3, 3 },
1505  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1506  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1507  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1508  2, 2, 2, 2 }
1509  };
1510 
1511  if (!filter_level)
1512  return;
1513 
1514  bedge_lim = 2*filter_level + inner_limit;
1515  mbedge_lim = bedge_lim + 4;
1516 
1517  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1518 
1519  if (mb_x) {
1520  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1521  mbedge_lim, inner_limit, hev_thresh);
1522  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1523  mbedge_lim, inner_limit, hev_thresh);
1524  }
1525 
1526  if (inner_filter) {
1527  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1528  inner_limit, hev_thresh);
1529  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1530  inner_limit, hev_thresh);
1531  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1532  inner_limit, hev_thresh);
1533  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1534  uvlinesize, bedge_lim,
1535  inner_limit, hev_thresh);
1536  }
1537 
1538  if (mb_y) {
1539  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1540  mbedge_lim, inner_limit, hev_thresh);
1541  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1542  mbedge_lim, inner_limit, hev_thresh);
1543  }
1544 
1545  if (inner_filter) {
1546  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1547  linesize, bedge_lim,
1548  inner_limit, hev_thresh);
1549  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1550  linesize, bedge_lim,
1551  inner_limit, hev_thresh);
1552  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1553  linesize, bedge_lim,
1554  inner_limit, hev_thresh);
1555  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1556  dst[2] + 4 * uvlinesize,
1557  uvlinesize, bedge_lim,
1558  inner_limit, hev_thresh);
1559  }
1560 }
1561 
1563 {
1564  int mbedge_lim, bedge_lim;
1565  int filter_level = f->filter_level;
1566  int inner_limit = f->inner_limit;
1567  int inner_filter = f->inner_filter;
1568  int linesize = s->linesize;
1569 
1570  if (!filter_level)
1571  return;
1572 
1573  bedge_lim = 2*filter_level + inner_limit;
1574  mbedge_lim = bedge_lim + 4;
1575 
1576  if (mb_x)
1577  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1578  if (inner_filter) {
1579  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1580  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1581  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1582  }
1583 
1584  if (mb_y)
1585  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1586  if (inner_filter) {
1587  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1588  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1589  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1590  }
1591 }
1592 
1593 static void release_queued_segmaps(VP8Context *s, int is_close)
1594 {
1595  int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1596  while (s->num_maps_to_be_freed > leave_behind)
1598  s->maps_are_invalid = 0;
1599 }
1600 
1601 #define MARGIN (16 << 2)
1602 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1603  AVFrame *prev_frame)
1604 {
1605  VP8Context *s = avctx->priv_data;
1606  int mb_x, mb_y;
1607 
1608  s->mv_min.y = -MARGIN;
1609  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1610  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1611  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1612  int mb_xy = mb_y*s->mb_width;
1613 
1614  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1615 
1616  s->mv_min.x = -MARGIN;
1617  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1618  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1619  if (mb_y == 0)
1620  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1621  decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1622  prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1623  s->mv_min.x -= 64;
1624  s->mv_max.x -= 64;
1625  }
1626  s->mv_min.y -= 64;
1627  s->mv_max.y -= 64;
1628  }
1629 }
1630 
1631 #if HAVE_THREADS
1632 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1633  do {\
1634  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1635  if (otd->thread_mb_pos < tmp) {\
1636  pthread_mutex_lock(&otd->lock);\
1637  td->wait_mb_pos = tmp;\
1638  do {\
1639  if (otd->thread_mb_pos >= tmp)\
1640  break;\
1641  pthread_cond_wait(&otd->cond, &otd->lock);\
1642  } while (1);\
1643  td->wait_mb_pos = INT_MAX;\
1644  pthread_mutex_unlock(&otd->lock);\
1645  }\
1646  } while(0);
1647 
1648 #define update_pos(td, mb_y, mb_x)\
1649  do {\
1650  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1651  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1652  int is_null = (next_td == NULL) || (prev_td == NULL);\
1653  int pos_check = (is_null) ? 1 :\
1654  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1655  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1656  td->thread_mb_pos = pos;\
1657  if (sliced_threading && pos_check) {\
1658  pthread_mutex_lock(&td->lock);\
1659  pthread_cond_broadcast(&td->cond);\
1660  pthread_mutex_unlock(&td->lock);\
1661  }\
1662  } while(0);
1663 #else
1664 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1665 #define update_pos(td, mb_y, mb_x)
1666 #endif
1667 
1668 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1669  int jobnr, int threadnr)
1670 {
1671  VP8Context *s = avctx->priv_data;
1672  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1673  int mb_y = td->thread_mb_pos>>16;
1674  int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1675  int num_jobs = s->num_jobs;
1676  AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1677  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1678  VP8Macroblock *mb;
1679  uint8_t *dst[3] = {
1680  curframe->data[0] + 16*mb_y*s->linesize,
1681  curframe->data[1] + 8*mb_y*s->uvlinesize,
1682  curframe->data[2] + 8*mb_y*s->uvlinesize
1683  };
1684  if (mb_y == 0) prev_td = td;
1685  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1686  if (mb_y == s->mb_height-1) next_td = td;
1687  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1688  if (s->mb_layout == 1)
1689  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1690  else {
1691  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1692  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1693  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1694  }
1695 
1696  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1697  // left edge of 129 for intra prediction
1698  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1699  for (i = 0; i < 3; i++)
1700  for (y = 0; y < 16>>!!i; y++)
1701  dst[i][y*curframe->linesize[i]-1] = 129;
1702  if (mb_y == 1) {
1703  s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1704  }
1705  }
1706 
1707  s->mv_min.x = -MARGIN;
1708  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1709 
1710  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1711  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1712  if (prev_td != td) {
1713  if (threadnr != 0) {
1714  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1715  } else {
1716  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1717  }
1718  }
1719 
1720  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1721  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1722 
1723  if (!s->mb_layout)
1724  decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1725  prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1726 
1727  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1728 
1729  if (!mb->skip)
1730  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1731 
1732  if (mb->mode <= MODE_I4x4)
1733  intra_predict(s, td, dst, mb, mb_x, mb_y);
1734  else
1735  inter_predict(s, td, dst, mb, mb_x, mb_y);
1736 
1737  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1738 
1739  if (!mb->skip) {
1740  idct_mb(s, td, dst, mb);
1741  } else {
1742  AV_ZERO64(td->left_nnz);
1743  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1744 
1745  // Reset DC block predictors if they would exist if the mb had coefficients
1746  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1747  td->left_nnz[8] = 0;
1748  s->top_nnz[mb_x][8] = 0;
1749  }
1750  }
1751 
1752  if (s->deblock_filter)
1753  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1754 
1755  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1756  if (s->filter.simple)
1757  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1758  else
1759  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1760  }
1761 
1762  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1763 
1764  dst[0] += 16;
1765  dst[1] += 8;
1766  dst[2] += 8;
1767  s->mv_min.x -= 64;
1768  s->mv_max.x -= 64;
1769 
1770  if (mb_x == s->mb_width+1) {
1771  update_pos(td, mb_y, s->mb_width+3);
1772  } else {
1773  update_pos(td, mb_y, mb_x);
1774  }
1775  }
1776 }
1777 
1778 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1779  int jobnr, int threadnr)
1780 {
1781  VP8Context *s = avctx->priv_data;
1782  VP8ThreadData *td = &s->thread_data[threadnr];
1783  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1784  AVFrame *curframe = s->curframe;
1785  VP8Macroblock *mb;
1786  VP8ThreadData *prev_td, *next_td;
1787  uint8_t *dst[3] = {
1788  curframe->data[0] + 16*mb_y*s->linesize,
1789  curframe->data[1] + 8*mb_y*s->uvlinesize,
1790  curframe->data[2] + 8*mb_y*s->uvlinesize
1791  };
1792 
1793  if (s->mb_layout == 1)
1794  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1795  else
1796  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1797 
1798  if (mb_y == 0) prev_td = td;
1799  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1800  if (mb_y == s->mb_height-1) next_td = td;
1801  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1802 
1803  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1804  VP8FilterStrength *f = &td->filter_strength[mb_x];
1805  if (prev_td != td) {
1806  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1807  }
1808  if (next_td != td)
1809  if (next_td != &s->thread_data[0]) {
1810  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1811  }
1812 
1813  if (num_jobs == 1) {
1814  if (s->filter.simple)
1815  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1816  else
1817  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1818  }
1819 
1820  if (s->filter.simple)
1821  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1822  else
1823  filter_mb(s, dst, f, mb_x, mb_y);
1824  dst[0] += 16;
1825  dst[1] += 8;
1826  dst[2] += 8;
1827 
1828  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1829  }
1830 }
1831 
1832 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1833  int jobnr, int threadnr)
1834 {
1835  VP8Context *s = avctx->priv_data;
1836  VP8ThreadData *td = &s->thread_data[jobnr];
1837  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1838  AVFrame *curframe = s->curframe;
1839  int mb_y, num_jobs = s->num_jobs;
1840  td->thread_nr = threadnr;
1841  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1842  if (mb_y >= s->mb_height) break;
1843  td->thread_mb_pos = mb_y<<16;
1844  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1845  if (s->deblock_filter)
1846  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1847  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1848 
1849  s->mv_min.y -= 64;
1850  s->mv_max.y -= 64;
1851 
1852  if (avctx->active_thread_type == FF_THREAD_FRAME)
1853  ff_thread_report_progress(curframe, mb_y, 0);
1854  }
1855 
1856  return 0;
1857 }
1858 
1859 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1860  AVPacket *avpkt)
1861 {
1862  VP8Context *s = avctx->priv_data;
1863  int ret, i, referenced, num_jobs;
1864  enum AVDiscard skip_thresh;
1865  AVFrame *av_uninit(curframe), *prev_frame;
1866 
1867  release_queued_segmaps(s, 0);
1868 
1869  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1870  goto err;
1871 
1872  prev_frame = s->framep[VP56_FRAME_CURRENT];
1873 
1874  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1876 
1877  skip_thresh = !referenced ? AVDISCARD_NONREF :
1879 
1880  if (avctx->skip_frame >= skip_thresh) {
1881  s->invisible = 1;
1882  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1883  goto skip_decode;
1884  }
1885  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1886 
1887  // release no longer referenced frames
1888  for (i = 0; i < 5; i++)
1889  if (s->frames[i].data[0] &&
1890  &s->frames[i] != prev_frame &&
1891  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1892  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1893  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1894  vp8_release_frame(s, &s->frames[i], 1, 0);
1895 
1896  // find a free buffer
1897  for (i = 0; i < 5; i++)
1898  if (&s->frames[i] != prev_frame &&
1899  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1900  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1901  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1902  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1903  break;
1904  }
1905  if (i == 5) {
1906  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1907  abort();
1908  }
1909  if (curframe->data[0])
1910  vp8_release_frame(s, curframe, 1, 0);
1911 
1912  // Given that arithmetic probabilities are updated every frame, it's quite likely
1913  // that the values we have on a random interframe are complete junk if we didn't
1914  // start decode on a keyframe. So just don't display anything rather than junk.
1915  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1916  !s->framep[VP56_FRAME_GOLDEN] ||
1917  !s->framep[VP56_FRAME_GOLDEN2])) {
1918  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1919  ret = AVERROR_INVALIDDATA;
1920  goto err;
1921  }
1922 
1923  curframe->key_frame = s->keyframe;
1924  curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1925  curframe->reference = referenced ? 3 : 0;
1926  if ((ret = vp8_alloc_frame(s, curframe))) {
1927  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1928  goto err;
1929  }
1930 
1931  // check if golden and altref are swapped
1932  if (s->update_altref != VP56_FRAME_NONE) {
1934  } else {
1936  }
1937  if (s->update_golden != VP56_FRAME_NONE) {
1939  } else {
1941  }
1942  if (s->update_last) {
1943  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1944  } else {
1946  }
1947  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1948 
1949  ff_thread_finish_setup(avctx);
1950 
1951  s->linesize = curframe->linesize[0];
1952  s->uvlinesize = curframe->linesize[1];
1953 
1954  if (!s->thread_data[0].edge_emu_buffer)
1955  for (i = 0; i < MAX_THREADS; i++)
1957 
1958  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1959  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1960  if (!s->mb_layout)
1961  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1962  if (!s->mb_layout && s->keyframe)
1963  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1964 
1965  // top edge of 127 for intra prediction
1966  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1967  s->top_border[0][15] = s->top_border[0][23] = 127;
1968  s->top_border[0][31] = 127;
1969  memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1970  }
1971  memset(s->ref_count, 0, sizeof(s->ref_count));
1972 
1973 
1974  // Make sure the previous frame has read its segmentation map,
1975  // if we re-use the same map.
1976  if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1977  ff_thread_await_progress(prev_frame, 1, 0);
1978 
1979  if (s->mb_layout == 1)
1980  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1981 
1982  if (avctx->active_thread_type == FF_THREAD_FRAME)
1983  num_jobs = 1;
1984  else
1985  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1986  s->num_jobs = num_jobs;
1987  s->curframe = curframe;
1988  s->prev_frame = prev_frame;
1989  s->mv_min.y = -MARGIN;
1990  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1991  for (i = 0; i < MAX_THREADS; i++) {
1992  s->thread_data[i].thread_mb_pos = 0;
1993  s->thread_data[i].wait_mb_pos = INT_MAX;
1994  }
1995  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1996 
1997  ff_thread_report_progress(curframe, INT_MAX, 0);
1998  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1999 
2000 skip_decode:
2001  // if future frames don't use the updated probabilities,
2002  // reset them to the values we saved
2003  if (!s->update_probabilities)
2004  s->prob[0] = s->prob[1];
2005 
2006  if (!s->invisible) {
2007  *(AVFrame*)data = *curframe;
2008  *got_frame = 1;
2009  }
2010 
2011  return avpkt->size;
2012 err:
2013  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2014  return ret;
2015 }
2016 
2018 {
2019  VP8Context *s = avctx->priv_data;
2020 
2021  s->avctx = avctx;
2022  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2023 
2024  ff_videodsp_init(&s->vdsp, 8);
2026  ff_vp8dsp_init(&s->vp8dsp);
2027 
2028  return 0;
2029 }
2030 
2032 {
2033  vp8_decode_flush_impl(avctx, 0, 1, 1);
2034  release_queued_segmaps(avctx->priv_data, 1);
2035  return 0;
2036 }
2037 
2039 {
2040  VP8Context *s = avctx->priv_data;
2041 
2042  s->avctx = avctx;
2043 
2044  return 0;
2045 }
2046 
2047 #define REBASE(pic) \
2048  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2049 
2051 {
2052  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2053 
2054  if (s->macroblocks_base &&
2055  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2056  free_buffers(s);
2057  s->maps_are_invalid = 1;
2058  s->mb_width = s_src->mb_width;
2059  s->mb_height = s_src->mb_height;
2060  }
2061 
2062  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2063  s->segmentation = s_src->segmentation;
2064  s->lf_delta = s_src->lf_delta;
2065  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2066 
2067  memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2068  s->framep[0] = REBASE(s_src->next_framep[0]);
2069  s->framep[1] = REBASE(s_src->next_framep[1]);
2070  s->framep[2] = REBASE(s_src->next_framep[2]);
2071  s->framep[3] = REBASE(s_src->next_framep[3]);
2072 
2073  return 0;
2074 }
2075 
2077  .name = "vp8",
2078  .type = AVMEDIA_TYPE_VIDEO,
2079  .id = AV_CODEC_ID_VP8,
2080  .priv_data_size = sizeof(VP8Context),
2081  .init = vp8_decode_init,
2086  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2089 };