FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44  pthread_cond_destroy(&s->thread_data[i].cond);
46 #endif
48  }
49  av_freep(&s->thread_data);
52  av_freep(&s->top_nnz);
53  av_freep(&s->top_border);
54 
55  s->macroblocks = NULL;
56 }
57 
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
59 {
60  int ret;
61  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
63  return ret;
64  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66  return AVERROR(ENOMEM);
67  }
68  return 0;
69 }
70 
72 {
75 }
76 
78 {
79  int ret;
80 
81  vp8_release_frame(s, dst);
82 
83  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
84  return ret;
85  if (src->seg_map &&
86  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87  vp8_release_frame(s, dst);
88  return AVERROR(ENOMEM);
89  }
90 
91  return 0;
92 }
93 
94 
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
96 {
97  VP8Context *s = avctx->priv_data;
98  int i;
99 
100  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101  vp8_release_frame(s, &s->frames[i]);
102  memset(s->framep, 0, sizeof(s->framep));
103 
104  if (free_mem)
105  free_buffers(s);
106 }
107 
108 static void vp8_decode_flush(AVCodecContext *avctx)
109 {
110  vp8_decode_flush_impl(avctx, 0);
111 }
112 
113 static int update_dimensions(VP8Context *s, int width, int height)
114 {
115  AVCodecContext *avctx = s->avctx;
116  int i, ret;
117 
118  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
119  height != s->avctx->height) {
121 
122  ret = ff_set_dimensions(s->avctx, width, height);
123  if (ret < 0)
124  return ret;
125  }
126 
127  s->mb_width = (s->avctx->coded_width +15) / 16;
128  s->mb_height = (s->avctx->coded_height+15) / 16;
129 
131  if (!s->mb_layout) { // Frame threading and one thread
132  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
134  }
135  else // Sliced threading
136  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
137  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
138  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
140 
141  for (i = 0; i < MAX_THREADS; i++) {
143 #if HAVE_THREADS
144  pthread_mutex_init(&s->thread_data[i].lock, NULL);
145  pthread_cond_init(&s->thread_data[i].cond, NULL);
146 #endif
147  }
148 
149  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
150  (!s->intra4x4_pred_mode_top && !s->mb_layout))
151  return AVERROR(ENOMEM);
152 
153  s->macroblocks = s->macroblocks_base + 1;
154 
155  return 0;
156 }
157 
159 {
160  VP56RangeCoder *c = &s->c;
161  int i;
162 
164 
165  if (vp8_rac_get(c)) { // update segment feature data
167 
168  for (i = 0; i < 4; i++)
170 
171  for (i = 0; i < 4; i++)
173  }
174  if (s->segmentation.update_map)
175  for (i = 0; i < 3; i++)
176  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
177 }
178 
180 {
181  VP56RangeCoder *c = &s->c;
182  int i;
183 
184  for (i = 0; i < 4; i++) {
185  if (vp8_rac_get(c)) {
186  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
187 
188  if (vp8_rac_get(c))
189  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
190  }
191  }
192 
193  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
194  if (vp8_rac_get(c)) {
195  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
196 
197  if (vp8_rac_get(c))
198  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
199  }
200  }
201 }
202 
203 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
204 {
205  const uint8_t *sizes = buf;
206  int i;
207 
208  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
209 
210  buf += 3*(s->num_coeff_partitions-1);
211  buf_size -= 3*(s->num_coeff_partitions-1);
212  if (buf_size < 0)
213  return -1;
214 
215  for (i = 0; i < s->num_coeff_partitions-1; i++) {
216  int size = AV_RL24(sizes + 3*i);
217  if (buf_size - size < 0)
218  return -1;
219 
220  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
221  buf += size;
222  buf_size -= size;
223  }
224  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
225 
226  return 0;
227 }
228 
229 static void get_quants(VP8Context *s)
230 {
231  VP56RangeCoder *c = &s->c;
232  int i, base_qi;
233 
234  int yac_qi = vp8_rac_get_uint(c, 7);
235  int ydc_delta = vp8_rac_get_sint(c, 4);
236  int y2dc_delta = vp8_rac_get_sint(c, 4);
237  int y2ac_delta = vp8_rac_get_sint(c, 4);
238  int uvdc_delta = vp8_rac_get_sint(c, 4);
239  int uvac_delta = vp8_rac_get_sint(c, 4);
240 
241  for (i = 0; i < 4; i++) {
242  if (s->segmentation.enabled) {
243  base_qi = s->segmentation.base_quant[i];
244  if (!s->segmentation.absolute_vals)
245  base_qi += yac_qi;
246  } else
247  base_qi = yac_qi;
248 
249  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
250  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
251  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
252  /* 101581>>16 is equivalent to 155/100 */
253  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
254  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
256 
257  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
259  }
260 }
261 
262 /**
263  * Determine which buffers golden and altref should be updated with after this frame.
264  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
265  *
266  * Intra frames update all 3 references
267  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
268  * If the update (golden|altref) flag is set, it's updated with the current frame
269  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
270  * If the flag is not set, the number read means:
271  * 0: no update
272  * 1: VP56_FRAME_PREVIOUS
273  * 2: update golden with altref, or update altref with golden
274  */
276 {
277  VP56RangeCoder *c = &s->c;
278 
279  if (update)
280  return VP56_FRAME_CURRENT;
281 
282  switch (vp8_rac_get_uint(c, 2)) {
283  case 1:
284  return VP56_FRAME_PREVIOUS;
285  case 2:
287  }
288  return VP56_FRAME_NONE;
289 }
290 
291 static void update_refs(VP8Context *s)
292 {
293  VP56RangeCoder *c = &s->c;
294 
295  int update_golden = vp8_rac_get(c);
296  int update_altref = vp8_rac_get(c);
297 
298  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
300 }
301 
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
303 {
304  VP56RangeCoder *c = &s->c;
305  int header_size, hscale, vscale, i, j, k, l, m, ret;
306  int width = s->avctx->width;
307  int height = s->avctx->height;
308 
309  s->keyframe = !(buf[0] & 1);
310  s->profile = (buf[0]>>1) & 7;
311  s->invisible = !(buf[0] & 0x10);
312  header_size = AV_RL24(buf) >> 5;
313  buf += 3;
314  buf_size -= 3;
315 
316  if (s->profile > 3)
317  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
318 
319  if (!s->profile)
320  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
323 
324  if (header_size > buf_size - 7*s->keyframe) {
325  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326  return AVERROR_INVALIDDATA;
327  }
328 
329  if (s->keyframe) {
330  if (AV_RL24(buf) != 0x2a019d) {
331  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332  return AVERROR_INVALIDDATA;
333  }
334  width = AV_RL16(buf+3) & 0x3fff;
335  height = AV_RL16(buf+5) & 0x3fff;
336  hscale = buf[4] >> 6;
337  vscale = buf[6] >> 6;
338  buf += 7;
339  buf_size -= 7;
340 
341  if (hscale || vscale)
342  avpriv_request_sample(s->avctx, "Upscaling");
343 
345  for (i = 0; i < 4; i++)
346  for (j = 0; j < 16; j++)
347  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348  sizeof(s->prob->token[i][j]));
349  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352  memset(&s->segmentation, 0, sizeof(s->segmentation));
353  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
354  }
355 
356  ff_vp56_init_range_decoder(c, buf, header_size);
357  buf += header_size;
358  buf_size -= header_size;
359 
360  if (s->keyframe) {
361  if (vp8_rac_get(c))
362  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
363  vp8_rac_get(c); // whether we can skip clamping in dsp functions
364  }
365 
366  if ((s->segmentation.enabled = vp8_rac_get(c)))
368  else
369  s->segmentation.update_map = 0; // FIXME: move this to some init function?
370 
371  s->filter.simple = vp8_rac_get(c);
372  s->filter.level = vp8_rac_get_uint(c, 6);
373  s->filter.sharpness = vp8_rac_get_uint(c, 3);
374 
375  if ((s->lf_delta.enabled = vp8_rac_get(c)))
376  if (vp8_rac_get(c))
377  update_lf_deltas(s);
378 
379  if (setup_partitions(s, buf, buf_size)) {
380  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
381  return AVERROR_INVALIDDATA;
382  }
383 
384  if (!s->macroblocks_base || /* first frame */
385  width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
386  if ((ret = update_dimensions(s, width, height)) < 0)
387  return ret;
388  }
389 
390  get_quants(s);
391 
392  if (!s->keyframe) {
393  update_refs(s);
395  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
396  }
397 
398  // if we aren't saving this frame's probabilities for future frames,
399  // make a copy of the current probabilities
400  if (!(s->update_probabilities = vp8_rac_get(c)))
401  s->prob[1] = s->prob[0];
402 
403  s->update_last = s->keyframe || vp8_rac_get(c);
404 
405  for (i = 0; i < 4; i++)
406  for (j = 0; j < 8; j++)
407  for (k = 0; k < 3; k++)
408  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410  int prob = vp8_rac_get_uint(c, 8);
411  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
412  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
413  }
414 
415  if ((s->mbskip_enabled = vp8_rac_get(c)))
416  s->prob->mbskip = vp8_rac_get_uint(c, 8);
417 
418  if (!s->keyframe) {
419  s->prob->intra = vp8_rac_get_uint(c, 8);
420  s->prob->last = vp8_rac_get_uint(c, 8);
421  s->prob->golden = vp8_rac_get_uint(c, 8);
422 
423  if (vp8_rac_get(c))
424  for (i = 0; i < 4; i++)
425  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
426  if (vp8_rac_get(c))
427  for (i = 0; i < 3; i++)
428  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
429 
430  // 17.2 MV probability update
431  for (i = 0; i < 2; i++)
432  for (j = 0; j < 19; j++)
434  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
435  }
436 
437  return 0;
438 }
439 
440 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
441 {
442  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
443  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
444 }
445 
446 /**
447  * Motion vector coding, 17.1.
448  */
450 {
451  int bit, x = 0;
452 
453  if (vp56_rac_get_prob_branchy(c, p[0])) {
454  int i;
455 
456  for (i = 0; i < 3; i++)
457  x += vp56_rac_get_prob(c, p[9 + i]) << i;
458  for (i = 9; i > 3; i--)
459  x += vp56_rac_get_prob(c, p[9 + i]) << i;
460  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
461  x += 8;
462  } else {
463  // small_mvtree
464  const uint8_t *ps = p+2;
465  bit = vp56_rac_get_prob(c, *ps);
466  ps += 1 + 3*bit;
467  x += 4*bit;
468  bit = vp56_rac_get_prob(c, *ps);
469  ps += 1 + bit;
470  x += 2*bit;
471  x += vp56_rac_get_prob(c, *ps);
472  }
473 
474  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
475 }
476 
477 static av_always_inline
478 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
479 {
480  if (left == top)
481  return vp8_submv_prob[4-!!left];
482  if (!top)
483  return vp8_submv_prob[2];
484  return vp8_submv_prob[1-!!left];
485 }
486 
487 /**
488  * Split motion vector prediction, 16.4.
489  * @returns the number of motion vectors parsed (2, 4 or 16)
490  */
491 static av_always_inline
493 {
494  int part_idx;
495  int n, num;
496  VP8Macroblock *top_mb;
497  VP8Macroblock *left_mb = &mb[-1];
498  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
499  *mbsplits_top,
500  *mbsplits_cur, *firstidx;
501  VP56mv *top_mv;
502  VP56mv *left_mv = left_mb->bmv;
503  VP56mv *cur_mv = mb->bmv;
504 
505  if (!layout) // layout is inlined, s->mb_layout is not
506  top_mb = &mb[2];
507  else
508  top_mb = &mb[-s->mb_width-1];
509  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
510  top_mv = top_mb->bmv;
511 
515  } else {
516  part_idx = VP8_SPLITMVMODE_8x8;
517  }
518  } else {
519  part_idx = VP8_SPLITMVMODE_4x4;
520  }
521 
522  num = vp8_mbsplit_count[part_idx];
523  mbsplits_cur = vp8_mbsplits[part_idx],
524  firstidx = vp8_mbfirstidx[part_idx];
525  mb->partitioning = part_idx;
526 
527  for (n = 0; n < num; n++) {
528  int k = firstidx[n];
529  uint32_t left, above;
530  const uint8_t *submv_prob;
531 
532  if (!(k & 3))
533  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
534  else
535  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
536  if (k <= 3)
537  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
538  else
539  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
540 
541  submv_prob = get_submv_prob(left, above);
542 
543  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
544  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
545  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
546  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
547  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
548  } else {
549  AV_ZERO32(&mb->bmv[n]);
550  }
551  } else {
552  AV_WN32A(&mb->bmv[n], above);
553  }
554  } else {
555  AV_WN32A(&mb->bmv[n], left);
556  }
557  }
558 
559  return num;
560 }
561 
562 static av_always_inline
563 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
564 {
565  VP8Macroblock *mb_edge[3] = { 0 /* top */,
566  mb - 1 /* left */,
567  0 /* top-left */ };
568  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
570  int idx = CNT_ZERO;
571  int cur_sign_bias = s->sign_bias[mb->ref_frame];
572  int8_t *sign_bias = s->sign_bias;
573  VP56mv near_mv[4];
574  uint8_t cnt[4] = { 0 };
575  VP56RangeCoder *c = &s->c;
576 
577  if (!layout) { // layout is inlined (s->mb_layout is not)
578  mb_edge[0] = mb + 2;
579  mb_edge[2] = mb + 1;
580  }
581  else {
582  mb_edge[0] = mb - s->mb_width-1;
583  mb_edge[2] = mb - s->mb_width-2;
584  }
585 
586  AV_ZERO32(&near_mv[0]);
587  AV_ZERO32(&near_mv[1]);
588  AV_ZERO32(&near_mv[2]);
589 
590  /* Process MB on top, left and top-left */
591  #define MV_EDGE_CHECK(n)\
592  {\
593  VP8Macroblock *edge = mb_edge[n];\
594  int edge_ref = edge->ref_frame;\
595  if (edge_ref != VP56_FRAME_CURRENT) {\
596  uint32_t mv = AV_RN32A(&edge->mv);\
597  if (mv) {\
598  if (cur_sign_bias != sign_bias[edge_ref]) {\
599  /* SWAR negate of the values in mv. */\
600  mv = ~mv;\
601  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
602  }\
603  if (!n || mv != AV_RN32A(&near_mv[idx]))\
604  AV_WN32A(&near_mv[++idx], mv);\
605  cnt[idx] += 1 + (n != 2);\
606  } else\
607  cnt[CNT_ZERO] += 1 + (n != 2);\
608  }\
609  }
610 
611  MV_EDGE_CHECK(0)
612  MV_EDGE_CHECK(1)
613  MV_EDGE_CHECK(2)
614 
616  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
617  mb->mode = VP8_MVMODE_MV;
618 
619  /* If we have three distinct MVs, merge first and last if they're the same */
620  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621  cnt[CNT_NEAREST] += 1;
622 
623  /* Swap near and nearest if necessary */
624  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
625  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
626  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
627  }
628 
629  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
630  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
631 
632  /* Choose the best mv out of 0,0 and the nearest mv */
633  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
635  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
636  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
637 
638  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
639  mb->mode = VP8_MVMODE_SPLIT;
640  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
641  } else {
642  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
643  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
644  mb->bmv[0] = mb->mv;
645  }
646  } else {
647  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
648  mb->bmv[0] = mb->mv;
649  }
650  } else {
651  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
652  mb->bmv[0] = mb->mv;
653  }
654  } else {
655  mb->mode = VP8_MVMODE_ZERO;
656  AV_ZERO32(&mb->mv);
657  mb->bmv[0] = mb->mv;
658  }
659 }
660 
661 static av_always_inline
663  int mb_x, int keyframe, int layout)
664 {
665  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
666 
667  if (layout) {
668  VP8Macroblock *mb_top = mb - s->mb_width - 1;
669  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
670  }
671  if (keyframe) {
672  int x, y;
673  uint8_t* top;
674  uint8_t* const left = s->intra4x4_pred_mode_left;
675  if (layout)
676  top = mb->intra4x4_pred_mode_top;
677  else
678  top = s->intra4x4_pred_mode_top + 4 * mb_x;
679  for (y = 0; y < 4; y++) {
680  for (x = 0; x < 4; x++) {
681  const uint8_t *ctx;
682  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
683  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
684  left[y] = top[x] = *intra4x4;
685  intra4x4++;
686  }
687  }
688  } else {
689  int i;
690  for (i = 0; i < 16; i++)
692  }
693 }
694 
695 static av_always_inline
696 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
697  uint8_t *segment, uint8_t *ref, int layout)
698 {
699  VP56RangeCoder *c = &s->c;
700 
701  if (s->segmentation.update_map) {
702  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
703  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
704  } else if (s->segmentation.enabled)
705  *segment = ref ? *ref : *segment;
706  mb->segment = *segment;
707 
708  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
709 
710  if (s->keyframe) {
712 
713  if (mb->mode == MODE_I4x4) {
714  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715  } else {
716  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717  if (s->mb_layout)
718  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
719  else
720  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721  AV_WN32A( s->intra4x4_pred_mode_left, modes);
722  }
723 
726  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
727  // inter MB, 16.2
728  if (vp56_rac_get_prob_branchy(c, s->prob->last))
729  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
731  else
733  s->ref_count[mb->ref_frame-1]++;
734 
735  // motion vectors, 16.3
736  decode_mvs(s, mb, mb_x, mb_y, layout);
737  } else {
738  // intra MB, 16.1
740 
741  if (mb->mode == MODE_I4x4)
742  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
743 
747  AV_ZERO32(&mb->bmv[0]);
748  }
749 }
750 
751 #ifndef decode_block_coeffs_internal
752 /**
753  * @param r arithmetic bitstream reader context
754  * @param block destination for block coefficients
755  * @param probs probabilities to use when reading trees from the bitstream
756  * @param i initial coeff index, 0 unless a separate DC block is coded
757  * @param qmul array holding the dc/ac dequant factor at position 0/1
758  * @return 0 if no coeffs were decoded
759  * otherwise, the index of the last coeff decoded plus one
760  */
762  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763  int i, uint8_t *token_prob, int16_t qmul[2])
764 {
765  VP56RangeCoder c = *r;
766  goto skip_eob;
767  do {
768  int coeff;
769  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
770  break;
771 
772 skip_eob:
773  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774  if (++i == 16)
775  break; // invalid input; blocks should end with EOB
776  token_prob = probs[i][0];
777  goto skip_eob;
778  }
779 
780  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781  coeff = 1;
782  token_prob = probs[i+1][1];
783  } else {
784  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786  if (coeff)
787  coeff += vp56_rac_get_prob(&c, token_prob[5]);
788  coeff += 2;
789  } else {
790  // DCT_CAT*
791  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
794  } else { // DCT_CAT2
795  coeff = 7;
796  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798  }
799  } else { // DCT_CAT3 and up
800  int a = vp56_rac_get_prob(&c, token_prob[8]);
801  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802  int cat = (a<<1) + b;
803  coeff = 3 + (8<<cat);
804  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
805  }
806  }
807  token_prob = probs[i+1][2];
808  }
809  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
810  } while (++i < 16);
811 
812  *r = c;
813  return i;
814 }
815 #endif
816 
817 /**
818  * @param c arithmetic bitstream reader context
819  * @param block destination for block coefficients
820  * @param probs probabilities to use when reading trees from the bitstream
821  * @param i initial coeff index, 0 unless a separate DC block is coded
822  * @param zero_nhood the initial prediction context for number of surrounding
823  * all-zero blocks (only left/top, so 0-2)
824  * @param qmul array holding the dc/ac dequant factor at position 0/1
825  * @return 0 if no coeffs were decoded
826  * otherwise, the index of the last coeff decoded plus one
827  */
828 static av_always_inline
830  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831  int i, int zero_nhood, int16_t qmul[2])
832 {
833  uint8_t *token_prob = probs[i][zero_nhood];
834  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
835  return 0;
836  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
837 }
838 
839 static av_always_inline
841  uint8_t t_nnz[9], uint8_t l_nnz[9])
842 {
843  int i, x, y, luma_start = 0, luma_ctx = 3;
844  int nnz_pred, nnz, nnz_total = 0;
845  int segment = mb->segment;
846  int block_dc = 0;
847 
848  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849  nnz_pred = t_nnz[8] + l_nnz[8];
850 
851  // decode DC values and do hadamard
852  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853  s->qmat[segment].luma_dc_qmul);
854  l_nnz[8] = t_nnz[8] = !!nnz;
855  if (nnz) {
856  nnz_total += nnz;
857  block_dc = 1;
858  if (nnz == 1)
860  else
861  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
862  }
863  luma_start = 1;
864  luma_ctx = 0;
865  }
866 
867  // luma blocks
868  for (y = 0; y < 4; y++)
869  for (x = 0; x < 4; x++) {
870  nnz_pred = l_nnz[y] + t_nnz[x];
871  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872  nnz_pred, s->qmat[segment].luma_qmul);
873  // nnz+block_dc may be one more than the actual last index, but we don't care
874  td->non_zero_count_cache[y][x] = nnz + block_dc;
875  t_nnz[x] = l_nnz[y] = !!nnz;
876  nnz_total += nnz;
877  }
878 
879  // chroma blocks
880  // TODO: what to do about dimensions? 2nd dim for luma is x,
881  // but for chroma it's (y<<1)|x
882  for (i = 4; i < 6; i++)
883  for (y = 0; y < 2; y++)
884  for (x = 0; x < 2; x++) {
885  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887  nnz_pred, s->qmat[segment].chroma_qmul);
888  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
890  nnz_total += nnz;
891  }
892 
893  // if there were no coded coeffs despite the macroblock not being marked skip,
894  // we MUST not do the inner loop filter and should not do IDCT
895  // Since skip isn't used for bitstream prediction, just manually set it.
896  if (!nnz_total)
897  mb->skip = 1;
898 }
899 
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902  int linesize, int uvlinesize, int simple)
903 {
904  AV_COPY128(top_border, src_y + 15*linesize);
905  if (!simple) {
906  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
908  }
909 }
910 
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914  int simple, int xchg)
915 {
916  uint8_t *top_border_m1 = top_border-32; // for TL prediction
917  src_y -= linesize;
918  src_cb -= uvlinesize;
919  src_cr -= uvlinesize;
920 
921 #define XCHG(a,b,xchg) do { \
922  if (xchg) AV_SWAP64(b,a); \
923  else AV_COPY64(b,a); \
924  } while (0)
925 
926  XCHG(top_border_m1+8, src_y-8, xchg);
927  XCHG(top_border, src_y, xchg);
928  XCHG(top_border+8, src_y+8, 1);
929  if (mb_x < mb_width-1)
930  XCHG(top_border+32, src_y+16, 1);
931 
932  // only copy chroma for normal loop filter
933  // or to initialize the top row to 127
934  if (!simple || !mb_y) {
935  XCHG(top_border_m1+16, src_cb-8, xchg);
936  XCHG(top_border_m1+24, src_cr-8, xchg);
937  XCHG(top_border+16, src_cb, 1);
938  XCHG(top_border+24, src_cr, 1);
939  }
940 }
941 
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
944 {
945  if (!mb_x) {
946  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
947  } else {
948  return mb_y ? mode : LEFT_DC_PRED8x8;
949  }
950 }
951 
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
954 {
955  if (!mb_x) {
956  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
957  } else {
958  return mb_y ? mode : HOR_PRED8x8;
959  }
960 }
961 
962 static av_always_inline
963 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
964 {
965  switch (mode) {
966  case DC_PRED8x8:
967  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
968  case VERT_PRED8x8:
969  return !mb_y ? DC_127_PRED8x8 : mode;
970  case HOR_PRED8x8:
971  return !mb_x ? DC_129_PRED8x8 : mode;
972  case PLANE_PRED8x8 /*TM*/:
973  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
974  }
975  return mode;
976 }
977 
978 static av_always_inline
979 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
980 {
981  if (!mb_x) {
982  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
983  } else {
984  return mb_y ? mode : HOR_VP8_PRED;
985  }
986 }
987 
988 static av_always_inline
989 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
990 {
991  switch (mode) {
992  case VERT_PRED:
993  if (!mb_x && mb_y) {
994  *copy_buf = 1;
995  return mode;
996  }
997  /* fall-through */
998  case DIAG_DOWN_LEFT_PRED:
999  case VERT_LEFT_PRED:
1000  return !mb_y ? DC_127_PRED : mode;
1001  case HOR_PRED:
1002  if (!mb_y) {
1003  *copy_buf = 1;
1004  return mode;
1005  }
1006  /* fall-through */
1007  case HOR_UP_PRED:
1008  return !mb_x ? DC_129_PRED : mode;
1009  case TM_VP8_PRED:
1010  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1011  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1012  case DIAG_DOWN_RIGHT_PRED:
1013  case VERT_RIGHT_PRED:
1014  case HOR_DOWN_PRED:
1015  if (!mb_y || !mb_x)
1016  *copy_buf = 1;
1017  return mode;
1018  }
1019  return mode;
1020 }
1021 
1022 static av_always_inline
1024  VP8Macroblock *mb, int mb_x, int mb_y)
1025 {
1026  int x, y, mode, nnz;
1027  uint32_t tr;
1028 
1029  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1030  // otherwise, skip it if we aren't going to deblock
1031  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1032  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1033  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1034  s->filter.simple, 1);
1035 
1036  if (mb->mode < MODE_I4x4) {
1037  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1038  s->hpc.pred16x16[mode](dst[0], s->linesize);
1039  } else {
1040  uint8_t *ptr = dst[0];
1041  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1042  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1043 
1044  // all blocks on the right edge of the macroblock use bottom edge
1045  // the top macroblock for their topright edge
1046  uint8_t *tr_right = ptr - s->linesize + 16;
1047 
1048  // if we're on the right edge of the frame, said edge is extended
1049  // from the top macroblock
1050  if (mb_y &&
1051  mb_x == s->mb_width-1) {
1052  tr = tr_right[-1]*0x01010101u;
1053  tr_right = (uint8_t *)&tr;
1054  }
1055 
1056  if (mb->skip)
1058 
1059  for (y = 0; y < 4; y++) {
1060  uint8_t *topright = ptr + 4 - s->linesize;
1061  for (x = 0; x < 4; x++) {
1062  int copy = 0, linesize = s->linesize;
1063  uint8_t *dst = ptr+4*x;
1064  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1065 
1066  if ((y == 0 || x == 3) && mb_y == 0) {
1067  topright = tr_top;
1068  } else if (x == 3)
1069  topright = tr_right;
1070 
1071  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1072  if (copy) {
1073  dst = copy_dst + 12;
1074  linesize = 8;
1075  if (!(mb_y + y)) {
1076  copy_dst[3] = 127U;
1077  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1078  } else {
1079  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1080  if (!(mb_x + x)) {
1081  copy_dst[3] = 129U;
1082  } else {
1083  copy_dst[3] = ptr[4*x-s->linesize-1];
1084  }
1085  }
1086  if (!(mb_x + x)) {
1087  copy_dst[11] =
1088  copy_dst[19] =
1089  copy_dst[27] =
1090  copy_dst[35] = 129U;
1091  } else {
1092  copy_dst[11] = ptr[4*x -1];
1093  copy_dst[19] = ptr[4*x+s->linesize -1];
1094  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1095  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1096  }
1097  }
1098  s->hpc.pred4x4[mode](dst, topright, linesize);
1099  if (copy) {
1100  AV_COPY32(ptr+4*x , copy_dst+12);
1101  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1102  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1103  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1104  }
1105 
1106  nnz = td->non_zero_count_cache[y][x];
1107  if (nnz) {
1108  if (nnz == 1)
1109  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1110  else
1111  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1112  }
1113  topright += 4;
1114  }
1115 
1116  ptr += 4*s->linesize;
1117  intra4x4 += 4;
1118  }
1119  }
1120 
1121  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1122  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1123  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1124 
1125  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1126  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1127  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1128  s->filter.simple, 0);
1129 }
1130 
1131 static const uint8_t subpel_idx[3][8] = {
1132  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1133  // also function pointer index
1134  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1135  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1136 };
1137 
1138 /**
1139  * luma MC function
1140  *
1141  * @param s VP8 decoding context
1142  * @param dst target buffer for block data at block position
1143  * @param ref reference picture buffer at origin (0, 0)
1144  * @param mv motion vector (relative to block position) to get pixel data from
1145  * @param x_off horizontal position of block from origin (0, 0)
1146  * @param y_off vertical position of block from origin (0, 0)
1147  * @param block_w width of block (16, 8 or 4)
1148  * @param block_h height of block (always same as block_w)
1149  * @param width width of src/dst plane data
1150  * @param height height of src/dst plane data
1151  * @param linesize size of a single line of plane data, including padding
1152  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1153  */
1154 static av_always_inline
1156  ThreadFrame *ref, const VP56mv *mv,
1157  int x_off, int y_off, int block_w, int block_h,
1158  int width, int height, ptrdiff_t linesize,
1159  vp8_mc_func mc_func[3][3])
1160 {
1161  uint8_t *src = ref->f->data[0];
1162 
1163  if (AV_RN32A(mv)) {
1164  int src_linesize = linesize;
1165 
1166  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1167  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1168 
1169  x_off += mv->x >> 2;
1170  y_off += mv->y >> 2;
1171 
1172  // edge emulation
1173  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1174  src += y_off * linesize + x_off;
1175  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1176  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1178  src - my_idx * linesize - mx_idx,
1179  EDGE_EMU_LINESIZE, linesize,
1180  block_w + subpel_idx[1][mx],
1181  block_h + subpel_idx[1][my],
1182  x_off - mx_idx, y_off - my_idx, width, height);
1183  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1184  src_linesize = EDGE_EMU_LINESIZE;
1185  }
1186  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1187  } else {
1188  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1189  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1190  }
1191 }
1192 
1193 /**
1194  * chroma MC function
1195  *
1196  * @param s VP8 decoding context
1197  * @param dst1 target buffer for block data at block position (U plane)
1198  * @param dst2 target buffer for block data at block position (V plane)
1199  * @param ref reference picture buffer at origin (0, 0)
1200  * @param mv motion vector (relative to block position) to get pixel data from
1201  * @param x_off horizontal position of block from origin (0, 0)
1202  * @param y_off vertical position of block from origin (0, 0)
1203  * @param block_w width of block (16, 8 or 4)
1204  * @param block_h height of block (always same as block_w)
1205  * @param width width of src/dst plane data
1206  * @param height height of src/dst plane data
1207  * @param linesize size of a single line of plane data, including padding
1208  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1209  */
1210 static av_always_inline
1212  ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1213  int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1214  vp8_mc_func mc_func[3][3])
1215 {
1216  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1217 
1218  if (AV_RN32A(mv)) {
1219  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1220  int my = mv->y&7, my_idx = subpel_idx[0][my];
1221 
1222  x_off += mv->x >> 3;
1223  y_off += mv->y >> 3;
1224 
1225  // edge emulation
1226  src1 += y_off * linesize + x_off;
1227  src2 += y_off * linesize + x_off;
1228  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1229  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1230  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1232  src1 - my_idx * linesize - mx_idx,
1233  EDGE_EMU_LINESIZE, linesize,
1234  block_w + subpel_idx[1][mx],
1235  block_h + subpel_idx[1][my],
1236  x_off - mx_idx, y_off - my_idx, width, height);
1237  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1238  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1239 
1241  src2 - my_idx * linesize - mx_idx,
1242  EDGE_EMU_LINESIZE, linesize,
1243  block_w + subpel_idx[1][mx],
1244  block_h + subpel_idx[1][my],
1245  x_off - mx_idx, y_off - my_idx, width, height);
1246  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1247  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1248  } else {
1249  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1250  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1251  }
1252  } else {
1253  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1254  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1255  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1256  }
1257 }
1258 
1259 static av_always_inline
1261  ThreadFrame *ref_frame, int x_off, int y_off,
1262  int bx_off, int by_off,
1263  int block_w, int block_h,
1264  int width, int height, VP56mv *mv)
1265 {
1266  VP56mv uvmv = *mv;
1267 
1268  /* Y */
1269  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1270  ref_frame, mv, x_off + bx_off, y_off + by_off,
1271  block_w, block_h, width, height, s->linesize,
1272  s->put_pixels_tab[block_w == 8]);
1273 
1274  /* U/V */
1275  if (s->profile == 3) {
1276  uvmv.x &= ~7;
1277  uvmv.y &= ~7;
1278  }
1279  x_off >>= 1; y_off >>= 1;
1280  bx_off >>= 1; by_off >>= 1;
1281  width >>= 1; height >>= 1;
1282  block_w >>= 1; block_h >>= 1;
1283  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1284  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1285  &uvmv, x_off + bx_off, y_off + by_off,
1286  block_w, block_h, width, height, s->uvlinesize,
1287  s->put_pixels_tab[1 + (block_w == 4)]);
1288 }
1289 
1290 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1291  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1292 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1293 {
1294  /* Don't prefetch refs that haven't been used very often this frame. */
1295  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1296  int x_off = mb_x << 4, y_off = mb_y << 4;
1297  int mx = (mb->mv.x>>2) + x_off + 8;
1298  int my = (mb->mv.y>>2) + y_off;
1299  uint8_t **src= s->framep[ref]->tf.f->data;
1300  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1301  /* For threading, a ff_thread_await_progress here might be useful, but
1302  * it actually slows down the decoder. Since a bad prefetch doesn't
1303  * generate bad decoder output, we don't run it here. */
1304  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1305  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1306  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1307  }
1308 }
1309 
1310 /**
1311  * Apply motion vectors to prediction buffer, chapter 18.
1312  */
1313 static av_always_inline
1315  VP8Macroblock *mb, int mb_x, int mb_y)
1316 {
1317  int x_off = mb_x << 4, y_off = mb_y << 4;
1318  int width = 16*s->mb_width, height = 16*s->mb_height;
1319  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1320  VP56mv *bmv = mb->bmv;
1321 
1322  switch (mb->partitioning) {
1323  case VP8_SPLITMVMODE_NONE:
1324  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1325  0, 0, 16, 16, width, height, &mb->mv);
1326  break;
1327  case VP8_SPLITMVMODE_4x4: {
1328  int x, y;
1329  VP56mv uvmv;
1330 
1331  /* Y */
1332  for (y = 0; y < 4; y++) {
1333  for (x = 0; x < 4; x++) {
1334  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1335  ref, &bmv[4*y + x],
1336  4*x + x_off, 4*y + y_off, 4, 4,
1337  width, height, s->linesize,
1338  s->put_pixels_tab[2]);
1339  }
1340  }
1341 
1342  /* U/V */
1343  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1344  for (y = 0; y < 2; y++) {
1345  for (x = 0; x < 2; x++) {
1346  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1347  mb->bmv[ 2*y * 4 + 2*x+1].x +
1348  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1349  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1350  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1351  mb->bmv[ 2*y * 4 + 2*x+1].y +
1352  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1353  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1354  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1355  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1356  if (s->profile == 3) {
1357  uvmv.x &= ~7;
1358  uvmv.y &= ~7;
1359  }
1360  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1361  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1362  4*x + x_off, 4*y + y_off, 4, 4,
1363  width, height, s->uvlinesize,
1364  s->put_pixels_tab[2]);
1365  }
1366  }
1367  break;
1368  }
1369  case VP8_SPLITMVMODE_16x8:
1370  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1371  0, 0, 16, 8, width, height, &bmv[0]);
1372  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1373  0, 8, 16, 8, width, height, &bmv[1]);
1374  break;
1375  case VP8_SPLITMVMODE_8x16:
1376  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1377  0, 0, 8, 16, width, height, &bmv[0]);
1378  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1379  8, 0, 8, 16, width, height, &bmv[1]);
1380  break;
1381  case VP8_SPLITMVMODE_8x8:
1382  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383  0, 0, 8, 8, width, height, &bmv[0]);
1384  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385  8, 0, 8, 8, width, height, &bmv[1]);
1386  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387  0, 8, 8, 8, width, height, &bmv[2]);
1388  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389  8, 8, 8, 8, width, height, &bmv[3]);
1390  break;
1391  }
1392 }
1393 
1395  uint8_t *dst[3], VP8Macroblock *mb)
1396 {
1397  int x, y, ch;
1398 
1399  if (mb->mode != MODE_I4x4) {
1400  uint8_t *y_dst = dst[0];
1401  for (y = 0; y < 4; y++) {
1402  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1403  if (nnz4) {
1404  if (nnz4&~0x01010101) {
1405  for (x = 0; x < 4; x++) {
1406  if ((uint8_t)nnz4 == 1)
1407  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1408  else if((uint8_t)nnz4 > 1)
1409  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1410  nnz4 >>= 8;
1411  if (!nnz4)
1412  break;
1413  }
1414  } else {
1415  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1416  }
1417  }
1418  y_dst += 4*s->linesize;
1419  }
1420  }
1421 
1422  for (ch = 0; ch < 2; ch++) {
1423  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1424  if (nnz4) {
1425  uint8_t *ch_dst = dst[1+ch];
1426  if (nnz4&~0x01010101) {
1427  for (y = 0; y < 2; y++) {
1428  for (x = 0; x < 2; x++) {
1429  if ((uint8_t)nnz4 == 1)
1430  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1431  else if((uint8_t)nnz4 > 1)
1432  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1433  nnz4 >>= 8;
1434  if (!nnz4)
1435  goto chroma_idct_end;
1436  }
1437  ch_dst += 4*s->uvlinesize;
1438  }
1439  } else {
1440  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1441  }
1442  }
1443 chroma_idct_end: ;
1444  }
1445 }
1446 
1448 {
1449  int interior_limit, filter_level;
1450 
1451  if (s->segmentation.enabled) {
1452  filter_level = s->segmentation.filter_level[mb->segment];
1453  if (!s->segmentation.absolute_vals)
1454  filter_level += s->filter.level;
1455  } else
1456  filter_level = s->filter.level;
1457 
1458  if (s->lf_delta.enabled) {
1459  filter_level += s->lf_delta.ref[mb->ref_frame];
1460  filter_level += s->lf_delta.mode[mb->mode];
1461  }
1462 
1463  filter_level = av_clip_uintp2(filter_level, 6);
1464 
1465  interior_limit = filter_level;
1466  if (s->filter.sharpness) {
1467  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1468  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1469  }
1470  interior_limit = FFMAX(interior_limit, 1);
1471 
1472  f->filter_level = filter_level;
1473  f->inner_limit = interior_limit;
1474  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1475 }
1476 
1477 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1478 {
1479  int mbedge_lim, bedge_lim, hev_thresh;
1480  int filter_level = f->filter_level;
1481  int inner_limit = f->inner_limit;
1482  int inner_filter = f->inner_filter;
1483  int linesize = s->linesize;
1484  int uvlinesize = s->uvlinesize;
1485  static const uint8_t hev_thresh_lut[2][64] = {
1486  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1487  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1488  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1489  3, 3, 3, 3 },
1490  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1491  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1492  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1493  2, 2, 2, 2 }
1494  };
1495 
1496  if (!filter_level)
1497  return;
1498 
1499  bedge_lim = 2*filter_level + inner_limit;
1500  mbedge_lim = bedge_lim + 4;
1501 
1502  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1503 
1504  if (mb_x) {
1505  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1506  mbedge_lim, inner_limit, hev_thresh);
1507  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1508  mbedge_lim, inner_limit, hev_thresh);
1509  }
1510 
1511  if (inner_filter) {
1512  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1513  inner_limit, hev_thresh);
1514  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1515  inner_limit, hev_thresh);
1516  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1517  inner_limit, hev_thresh);
1518  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1519  uvlinesize, bedge_lim,
1520  inner_limit, hev_thresh);
1521  }
1522 
1523  if (mb_y) {
1524  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1525  mbedge_lim, inner_limit, hev_thresh);
1526  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1527  mbedge_lim, inner_limit, hev_thresh);
1528  }
1529 
1530  if (inner_filter) {
1531  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1532  linesize, bedge_lim,
1533  inner_limit, hev_thresh);
1534  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1535  linesize, bedge_lim,
1536  inner_limit, hev_thresh);
1537  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1538  linesize, bedge_lim,
1539  inner_limit, hev_thresh);
1540  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1541  dst[2] + 4 * uvlinesize,
1542  uvlinesize, bedge_lim,
1543  inner_limit, hev_thresh);
1544  }
1545 }
1546 
1547 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1548 {
1549  int mbedge_lim, bedge_lim;
1550  int filter_level = f->filter_level;
1551  int inner_limit = f->inner_limit;
1552  int inner_filter = f->inner_filter;
1553  int linesize = s->linesize;
1554 
1555  if (!filter_level)
1556  return;
1557 
1558  bedge_lim = 2*filter_level + inner_limit;
1559  mbedge_lim = bedge_lim + 4;
1560 
1561  if (mb_x)
1562  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1563  if (inner_filter) {
1564  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1565  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1566  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1567  }
1568 
1569  if (mb_y)
1570  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1571  if (inner_filter) {
1572  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1573  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1574  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1575  }
1576 }
1577 
1578 #define MARGIN (16 << 2)
1579 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1580  VP8Frame *prev_frame)
1581 {
1582  VP8Context *s = avctx->priv_data;
1583  int mb_x, mb_y;
1584 
1585  s->mv_min.y = -MARGIN;
1586  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1587  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1588  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1589  int mb_xy = mb_y*s->mb_width;
1590 
1591  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1592 
1593  s->mv_min.x = -MARGIN;
1594  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1595  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1596  if (mb_y == 0)
1597  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1598  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1599  prev_frame && prev_frame->seg_map ?
1600  prev_frame->seg_map->data + mb_xy : NULL, 1);
1601  s->mv_min.x -= 64;
1602  s->mv_max.x -= 64;
1603  }
1604  s->mv_min.y -= 64;
1605  s->mv_max.y -= 64;
1606  }
1607 }
1608 
1609 #if HAVE_THREADS
1610 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1611  do {\
1612  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1613  if (otd->thread_mb_pos < tmp) {\
1614  pthread_mutex_lock(&otd->lock);\
1615  td->wait_mb_pos = tmp;\
1616  do {\
1617  if (otd->thread_mb_pos >= tmp)\
1618  break;\
1619  pthread_cond_wait(&otd->cond, &otd->lock);\
1620  } while (1);\
1621  td->wait_mb_pos = INT_MAX;\
1622  pthread_mutex_unlock(&otd->lock);\
1623  }\
1624  } while(0);
1625 
1626 #define update_pos(td, mb_y, mb_x)\
1627  do {\
1628  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1629  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1630  int is_null = (next_td == NULL) || (prev_td == NULL);\
1631  int pos_check = (is_null) ? 1 :\
1632  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1633  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1634  td->thread_mb_pos = pos;\
1635  if (sliced_threading && pos_check) {\
1636  pthread_mutex_lock(&td->lock);\
1637  pthread_cond_broadcast(&td->cond);\
1638  pthread_mutex_unlock(&td->lock);\
1639  }\
1640  } while(0);
1641 #else
1642 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1643 #define update_pos(td, mb_y, mb_x)
1644 #endif
1645 
1646 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1647  int jobnr, int threadnr)
1648 {
1649  VP8Context *s = avctx->priv_data;
1650  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1651  int mb_y = td->thread_mb_pos>>16;
1652  int mb_x, mb_xy = mb_y*s->mb_width;
1653  int num_jobs = s->num_jobs;
1654  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1655  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1656  VP8Macroblock *mb;
1657  uint8_t *dst[3] = {
1658  curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1659  curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1660  curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1661  };
1662  if (mb_y == 0) prev_td = td;
1663  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1664  if (mb_y == s->mb_height-1) next_td = td;
1665  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1666  if (s->mb_layout == 1)
1667  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1668  else {
1669  // Make sure the previous frame has read its segmentation map,
1670  // if we re-use the same map.
1671  if (prev_frame && s->segmentation.enabled &&
1673  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1674  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1675  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1676  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1677  }
1678 
1679  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1680 
1681  s->mv_min.x = -MARGIN;
1682  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1683 
1684  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1685  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1686  if (prev_td != td) {
1687  if (threadnr != 0) {
1688  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1689  } else {
1690  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1691  }
1692  }
1693 
1694  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1695  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1696 
1697  if (!s->mb_layout)
1698  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1699  prev_frame && prev_frame->seg_map ?
1700  prev_frame->seg_map->data + mb_xy : NULL, 0);
1701 
1702  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1703 
1704  if (!mb->skip)
1705  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1706 
1707  if (mb->mode <= MODE_I4x4)
1708  intra_predict(s, td, dst, mb, mb_x, mb_y);
1709  else
1710  inter_predict(s, td, dst, mb, mb_x, mb_y);
1711 
1712  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1713 
1714  if (!mb->skip) {
1715  idct_mb(s, td, dst, mb);
1716  } else {
1717  AV_ZERO64(td->left_nnz);
1718  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1719 
1720  // Reset DC block predictors if they would exist if the mb had coefficients
1721  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1722  td->left_nnz[8] = 0;
1723  s->top_nnz[mb_x][8] = 0;
1724  }
1725  }
1726 
1727  if (s->deblock_filter)
1728  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1729 
1730  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1731  if (s->filter.simple)
1732  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1733  else
1734  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1735  }
1736 
1737  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1738 
1739  dst[0] += 16;
1740  dst[1] += 8;
1741  dst[2] += 8;
1742  s->mv_min.x -= 64;
1743  s->mv_max.x -= 64;
1744 
1745  if (mb_x == s->mb_width+1) {
1746  update_pos(td, mb_y, s->mb_width+3);
1747  } else {
1748  update_pos(td, mb_y, mb_x);
1749  }
1750  }
1751 }
1752 
1753 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1754  int jobnr, int threadnr)
1755 {
1756  VP8Context *s = avctx->priv_data;
1757  VP8ThreadData *td = &s->thread_data[threadnr];
1758  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1759  AVFrame *curframe = s->curframe->tf.f;
1760  VP8Macroblock *mb;
1761  VP8ThreadData *prev_td, *next_td;
1762  uint8_t *dst[3] = {
1763  curframe->data[0] + 16*mb_y*s->linesize,
1764  curframe->data[1] + 8*mb_y*s->uvlinesize,
1765  curframe->data[2] + 8*mb_y*s->uvlinesize
1766  };
1767 
1768  if (s->mb_layout == 1)
1769  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1770  else
1771  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1772 
1773  if (mb_y == 0) prev_td = td;
1774  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1775  if (mb_y == s->mb_height-1) next_td = td;
1776  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1777 
1778  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1779  VP8FilterStrength *f = &td->filter_strength[mb_x];
1780  if (prev_td != td) {
1781  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1782  }
1783  if (next_td != td)
1784  if (next_td != &s->thread_data[0]) {
1785  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1786  }
1787 
1788  if (num_jobs == 1) {
1789  if (s->filter.simple)
1790  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1791  else
1792  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1793  }
1794 
1795  if (s->filter.simple)
1796  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1797  else
1798  filter_mb(s, dst, f, mb_x, mb_y);
1799  dst[0] += 16;
1800  dst[1] += 8;
1801  dst[2] += 8;
1802 
1803  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1804  }
1805 }
1806 
1807 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1808  int jobnr, int threadnr)
1809 {
1810  VP8Context *s = avctx->priv_data;
1811  VP8ThreadData *td = &s->thread_data[jobnr];
1812  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1813  VP8Frame *curframe = s->curframe;
1814  int mb_y, num_jobs = s->num_jobs;
1815  td->thread_nr = threadnr;
1816  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1817  if (mb_y >= s->mb_height) break;
1818  td->thread_mb_pos = mb_y<<16;
1819  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1820  if (s->deblock_filter)
1821  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1822  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1823 
1824  s->mv_min.y -= 64;
1825  s->mv_max.y -= 64;
1826 
1827  if (avctx->active_thread_type == FF_THREAD_FRAME)
1828  ff_thread_report_progress(&curframe->tf, mb_y, 0);
1829  }
1830 
1831  return 0;
1832 }
1833 
1834 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1835  AVPacket *avpkt)
1836 {
1837  VP8Context *s = avctx->priv_data;
1838  int ret, i, referenced, num_jobs;
1839  enum AVDiscard skip_thresh;
1840  VP8Frame *av_uninit(curframe), *prev_frame;
1841 
1842  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1843  goto err;
1844 
1845  prev_frame = s->framep[VP56_FRAME_CURRENT];
1846 
1847  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1849 
1850  skip_thresh = !referenced ? AVDISCARD_NONREF :
1852 
1853  if (avctx->skip_frame >= skip_thresh) {
1854  s->invisible = 1;
1855  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1856  goto skip_decode;
1857  }
1858  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1859 
1860  // release no longer referenced frames
1861  for (i = 0; i < 5; i++)
1862  if (s->frames[i].tf.f->data[0] &&
1863  &s->frames[i] != prev_frame &&
1864  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1865  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1866  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1867  vp8_release_frame(s, &s->frames[i]);
1868 
1869  // find a free buffer
1870  for (i = 0; i < 5; i++)
1871  if (&s->frames[i] != prev_frame &&
1872  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1873  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1874  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1875  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1876  break;
1877  }
1878  if (i == 5) {
1879  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1880  abort();
1881  }
1882  if (curframe->tf.f->data[0])
1883  vp8_release_frame(s, curframe);
1884 
1885  // Given that arithmetic probabilities are updated every frame, it's quite likely
1886  // that the values we have on a random interframe are complete junk if we didn't
1887  // start decode on a keyframe. So just don't display anything rather than junk.
1888  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1889  !s->framep[VP56_FRAME_GOLDEN] ||
1890  !s->framep[VP56_FRAME_GOLDEN2])) {
1891  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1892  ret = AVERROR_INVALIDDATA;
1893  goto err;
1894  }
1895 
1896  curframe->tf.f->key_frame = s->keyframe;
1897  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1898  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1899  goto err;
1900 
1901  // check if golden and altref are swapped
1902  if (s->update_altref != VP56_FRAME_NONE) {
1904  } else {
1906  }
1907  if (s->update_golden != VP56_FRAME_NONE) {
1909  } else {
1911  }
1912  if (s->update_last) {
1913  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1914  } else {
1916  }
1917  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1918 
1919  ff_thread_finish_setup(avctx);
1920 
1921  s->linesize = curframe->tf.f->linesize[0];
1922  s->uvlinesize = curframe->tf.f->linesize[1];
1923 
1924  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1925  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1926  if (!s->mb_layout)
1927  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1928  if (!s->mb_layout && s->keyframe)
1929  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1930 
1931  memset(s->ref_count, 0, sizeof(s->ref_count));
1932 
1933 
1934  if (s->mb_layout == 1) {
1935  // Make sure the previous frame has read its segmentation map,
1936  // if we re-use the same map.
1937  if (prev_frame && s->segmentation.enabled &&
1939  ff_thread_await_progress(&prev_frame->tf, 1, 0);
1940  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1941  }
1942 
1943  if (avctx->active_thread_type == FF_THREAD_FRAME)
1944  num_jobs = 1;
1945  else
1946  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1947  s->num_jobs = num_jobs;
1948  s->curframe = curframe;
1949  s->prev_frame = prev_frame;
1950  s->mv_min.y = -MARGIN;
1951  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1952  for (i = 0; i < MAX_THREADS; i++) {
1953  s->thread_data[i].thread_mb_pos = 0;
1954  s->thread_data[i].wait_mb_pos = INT_MAX;
1955  }
1956  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1957 
1958  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1959  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1960 
1961 skip_decode:
1962  // if future frames don't use the updated probabilities,
1963  // reset them to the values we saved
1964  if (!s->update_probabilities)
1965  s->prob[0] = s->prob[1];
1966 
1967  if (!s->invisible) {
1968  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
1969  return ret;
1970  *got_frame = 1;
1971  }
1972 
1973  return avpkt->size;
1974 err:
1975  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1976  return ret;
1977 }
1978 
1980 {
1981  VP8Context *s = avctx->priv_data;
1982  int i;
1983 
1984  vp8_decode_flush_impl(avctx, 1);
1985  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
1986  av_frame_free(&s->frames[i].tf.f);
1987 
1988  return 0;
1989 }
1990 
1992 {
1993  int i;
1994  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1995  s->frames[i].tf.f = av_frame_alloc();
1996  if (!s->frames[i].tf.f)
1997  return AVERROR(ENOMEM);
1998  }
1999  return 0;
2000 }
2001 
2003 {
2004  VP8Context *s = avctx->priv_data;
2005  int ret;
2006 
2007  s->avctx = avctx;
2008  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2009  avctx->internal->allocate_progress = 1;
2010 
2011  ff_videodsp_init(&s->vdsp, 8);
2013  ff_vp8dsp_init(&s->vp8dsp, 0);
2014 
2015  if ((ret = vp8_init_frames(s)) < 0) {
2016  ff_vp8_decode_free(avctx);
2017  return ret;
2018  }
2019 
2020  return 0;
2021 }
2022 
2024 {
2025  VP8Context *s = avctx->priv_data;
2026  int ret;
2027 
2028  s->avctx = avctx;
2029 
2030  if ((ret = vp8_init_frames(s)) < 0) {
2031  ff_vp8_decode_free(avctx);
2032  return ret;
2033  }
2034 
2035  return 0;
2036 }
2037 
2038 #define REBASE(pic) \
2039  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2040 
2042 {
2043  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2044  int i;
2045 
2046  if (s->macroblocks_base &&
2047  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2048  free_buffers(s);
2049  s->mb_width = s_src->mb_width;
2050  s->mb_height = s_src->mb_height;
2051  }
2052 
2053  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2054  s->segmentation = s_src->segmentation;
2055  s->lf_delta = s_src->lf_delta;
2056  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2057 
2058  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2059  if (s_src->frames[i].tf.f->data[0]) {
2060  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2061  if (ret < 0)
2062  return ret;
2063  }
2064  }
2065 
2066  s->framep[0] = REBASE(s_src->next_framep[0]);
2067  s->framep[1] = REBASE(s_src->next_framep[1]);
2068  s->framep[2] = REBASE(s_src->next_framep[2]);
2069  s->framep[3] = REBASE(s_src->next_framep[3]);
2070 
2071  return 0;
2072 }
2073 
2075  .name = "vp8",
2076  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2077  .type = AVMEDIA_TYPE_VIDEO,
2078  .id = AV_CODEC_ID_VP8,
2079  .priv_data_size = sizeof(VP8Context),
2087 };
2088