FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44  pthread_cond_destroy(&s->thread_data[i].cond);
46 #endif
48  }
49  av_freep(&s->thread_data);
52  av_freep(&s->top_nnz);
53  av_freep(&s->top_border);
54 
55  s->macroblocks = NULL;
56 }
57 
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
59 {
60  int ret;
61  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
63  return ret;
64  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66  return AVERROR(ENOMEM);
67  }
68  return 0;
69 }
70 
72 {
75 }
76 
78 {
79  int ret;
80 
81  vp8_release_frame(s, dst);
82 
83  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
84  return ret;
85  if (src->seg_map &&
86  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87  vp8_release_frame(s, dst);
88  return AVERROR(ENOMEM);
89  }
90 
91  return 0;
92 }
93 
94 
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
96 {
97  VP8Context *s = avctx->priv_data;
98  int i;
99 
100  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101  vp8_release_frame(s, &s->frames[i]);
102  memset(s->framep, 0, sizeof(s->framep));
103 
104  if (free_mem)
105  free_buffers(s);
106 }
107 
108 static void vp8_decode_flush(AVCodecContext *avctx)
109 {
110  vp8_decode_flush_impl(avctx, 0);
111 }
112 
113 static int update_dimensions(VP8Context *s, int width, int height)
114 {
115  AVCodecContext *avctx = s->avctx;
116  int i;
117 
118  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
119  height != s->avctx->height) {
120  if (av_image_check_size(width, height, 0, s->avctx))
121  return AVERROR_INVALIDDATA;
122 
124 
125  avcodec_set_dimensions(s->avctx, width, height);
126  }
127 
128  s->mb_width = (s->avctx->coded_width +15) / 16;
129  s->mb_height = (s->avctx->coded_height+15) / 16;
130 
132  if (!s->mb_layout) { // Frame threading and one thread
133  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
135  }
136  else // Sliced threading
137  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
138  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
139  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
141 
142  for (i = 0; i < MAX_THREADS; i++) {
144 #if HAVE_THREADS
145  pthread_mutex_init(&s->thread_data[i].lock, NULL);
146  pthread_cond_init(&s->thread_data[i].cond, NULL);
147 #endif
148  }
149 
150  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
151  (!s->intra4x4_pred_mode_top && !s->mb_layout))
152  return AVERROR(ENOMEM);
153 
154  s->macroblocks = s->macroblocks_base + 1;
155 
156  return 0;
157 }
158 
160 {
161  VP56RangeCoder *c = &s->c;
162  int i;
163 
165 
166  if (vp8_rac_get(c)) { // update segment feature data
168 
169  for (i = 0; i < 4; i++)
171 
172  for (i = 0; i < 4; i++)
174  }
175  if (s->segmentation.update_map)
176  for (i = 0; i < 3; i++)
177  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
178 }
179 
181 {
182  VP56RangeCoder *c = &s->c;
183  int i;
184 
185  for (i = 0; i < 4; i++) {
186  if (vp8_rac_get(c)) {
187  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
188 
189  if (vp8_rac_get(c))
190  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
191  }
192  }
193 
194  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
195  if (vp8_rac_get(c)) {
196  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
197 
198  if (vp8_rac_get(c))
199  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
200  }
201  }
202 }
203 
204 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 {
206  const uint8_t *sizes = buf;
207  int i;
208 
209  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210 
211  buf += 3*(s->num_coeff_partitions-1);
212  buf_size -= 3*(s->num_coeff_partitions-1);
213  if (buf_size < 0)
214  return -1;
215 
216  for (i = 0; i < s->num_coeff_partitions-1; i++) {
217  int size = AV_RL24(sizes + 3*i);
218  if (buf_size - size < 0)
219  return -1;
220 
221  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
222  buf += size;
223  buf_size -= size;
224  }
225  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
226 
227  return 0;
228 }
229 
230 static void get_quants(VP8Context *s)
231 {
232  VP56RangeCoder *c = &s->c;
233  int i, base_qi;
234 
235  int yac_qi = vp8_rac_get_uint(c, 7);
236  int ydc_delta = vp8_rac_get_sint(c, 4);
237  int y2dc_delta = vp8_rac_get_sint(c, 4);
238  int y2ac_delta = vp8_rac_get_sint(c, 4);
239  int uvdc_delta = vp8_rac_get_sint(c, 4);
240  int uvac_delta = vp8_rac_get_sint(c, 4);
241 
242  for (i = 0; i < 4; i++) {
243  if (s->segmentation.enabled) {
244  base_qi = s->segmentation.base_quant[i];
245  if (!s->segmentation.absolute_vals)
246  base_qi += yac_qi;
247  } else
248  base_qi = yac_qi;
249 
250  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
251  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
252  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
253  /* 101581>>16 is equivalent to 155/100 */
254  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
255  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
256  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
257 
258  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
259  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
260  }
261 }
262 
263 /**
264  * Determine which buffers golden and altref should be updated with after this frame.
265  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
266  *
267  * Intra frames update all 3 references
268  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
269  * If the update (golden|altref) flag is set, it's updated with the current frame
270  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
271  * If the flag is not set, the number read means:
272  * 0: no update
273  * 1: VP56_FRAME_PREVIOUS
274  * 2: update golden with altref, or update altref with golden
275  */
277 {
278  VP56RangeCoder *c = &s->c;
279 
280  if (update)
281  return VP56_FRAME_CURRENT;
282 
283  switch (vp8_rac_get_uint(c, 2)) {
284  case 1:
285  return VP56_FRAME_PREVIOUS;
286  case 2:
288  }
289  return VP56_FRAME_NONE;
290 }
291 
292 static void update_refs(VP8Context *s)
293 {
294  VP56RangeCoder *c = &s->c;
295 
296  int update_golden = vp8_rac_get(c);
297  int update_altref = vp8_rac_get(c);
298 
299  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
300  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
301 }
302 
303 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
304 {
305  VP56RangeCoder *c = &s->c;
306  int header_size, hscale, vscale, i, j, k, l, m, ret;
307  int width = s->avctx->width;
308  int height = s->avctx->height;
309 
310  s->keyframe = !(buf[0] & 1);
311  s->profile = (buf[0]>>1) & 7;
312  s->invisible = !(buf[0] & 0x10);
313  header_size = AV_RL24(buf) >> 5;
314  buf += 3;
315  buf_size -= 3;
316 
317  if (s->profile > 3)
318  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
319 
320  if (!s->profile)
321  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
322  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 
325  if (header_size > buf_size - 7*s->keyframe) {
326  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
327  return AVERROR_INVALIDDATA;
328  }
329 
330  if (s->keyframe) {
331  if (AV_RL24(buf) != 0x2a019d) {
332  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
333  return AVERROR_INVALIDDATA;
334  }
335  width = AV_RL16(buf+3) & 0x3fff;
336  height = AV_RL16(buf+5) & 0x3fff;
337  hscale = buf[4] >> 6;
338  vscale = buf[6] >> 6;
339  buf += 7;
340  buf_size -= 7;
341 
342  if (hscale || vscale)
343  avpriv_request_sample(s->avctx, "Upscaling");
344 
346  for (i = 0; i < 4; i++)
347  for (j = 0; j < 16; j++)
348  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
349  sizeof(s->prob->token[i][j]));
350  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
351  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
352  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
353  memset(&s->segmentation, 0, sizeof(s->segmentation));
354  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
355  }
356 
357  ff_vp56_init_range_decoder(c, buf, header_size);
358  buf += header_size;
359  buf_size -= header_size;
360 
361  if (s->keyframe) {
362  if (vp8_rac_get(c))
363  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364  vp8_rac_get(c); // whether we can skip clamping in dsp functions
365  }
366 
367  if ((s->segmentation.enabled = vp8_rac_get(c)))
369  else
370  s->segmentation.update_map = 0; // FIXME: move this to some init function?
371 
372  s->filter.simple = vp8_rac_get(c);
373  s->filter.level = vp8_rac_get_uint(c, 6);
374  s->filter.sharpness = vp8_rac_get_uint(c, 3);
375 
376  if ((s->lf_delta.enabled = vp8_rac_get(c)))
377  if (vp8_rac_get(c))
378  update_lf_deltas(s);
379 
380  if (setup_partitions(s, buf, buf_size)) {
381  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382  return AVERROR_INVALIDDATA;
383  }
384 
385  if (!s->macroblocks_base || /* first frame */
386  width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
387  if ((ret = update_dimensions(s, width, height)) < 0)
388  return ret;
389  }
390 
391  get_quants(s);
392 
393  if (!s->keyframe) {
394  update_refs(s);
396  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
397  }
398 
399  // if we aren't saving this frame's probabilities for future frames,
400  // make a copy of the current probabilities
401  if (!(s->update_probabilities = vp8_rac_get(c)))
402  s->prob[1] = s->prob[0];
403 
404  s->update_last = s->keyframe || vp8_rac_get(c);
405 
406  for (i = 0; i < 4; i++)
407  for (j = 0; j < 8; j++)
408  for (k = 0; k < 3; k++)
409  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411  int prob = vp8_rac_get_uint(c, 8);
412  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
414  }
415 
416  if ((s->mbskip_enabled = vp8_rac_get(c)))
417  s->prob->mbskip = vp8_rac_get_uint(c, 8);
418 
419  if (!s->keyframe) {
420  s->prob->intra = vp8_rac_get_uint(c, 8);
421  s->prob->last = vp8_rac_get_uint(c, 8);
422  s->prob->golden = vp8_rac_get_uint(c, 8);
423 
424  if (vp8_rac_get(c))
425  for (i = 0; i < 4; i++)
426  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427  if (vp8_rac_get(c))
428  for (i = 0; i < 3; i++)
429  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430 
431  // 17.2 MV probability update
432  for (i = 0; i < 2; i++)
433  for (j = 0; j < 19; j++)
435  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
436  }
437 
438  return 0;
439 }
440 
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 {
443  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
445 }
446 
447 /**
448  * Motion vector coding, 17.1.
449  */
451 {
452  int bit, x = 0;
453 
454  if (vp56_rac_get_prob_branchy(c, p[0])) {
455  int i;
456 
457  for (i = 0; i < 3; i++)
458  x += vp56_rac_get_prob(c, p[9 + i]) << i;
459  for (i = 9; i > 3; i--)
460  x += vp56_rac_get_prob(c, p[9 + i]) << i;
461  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
462  x += 8;
463  } else {
464  // small_mvtree
465  const uint8_t *ps = p+2;
466  bit = vp56_rac_get_prob(c, *ps);
467  ps += 1 + 3*bit;
468  x += 4*bit;
469  bit = vp56_rac_get_prob(c, *ps);
470  ps += 1 + bit;
471  x += 2*bit;
472  x += vp56_rac_get_prob(c, *ps);
473  }
474 
475  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
476 }
477 
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
480 {
481  if (left == top)
482  return vp8_submv_prob[4-!!left];
483  if (!top)
484  return vp8_submv_prob[2];
485  return vp8_submv_prob[1-!!left];
486 }
487 
488 /**
489  * Split motion vector prediction, 16.4.
490  * @returns the number of motion vectors parsed (2, 4 or 16)
491  */
492 static av_always_inline
494 {
495  int part_idx;
496  int n, num;
497  VP8Macroblock *top_mb;
498  VP8Macroblock *left_mb = &mb[-1];
499  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500  *mbsplits_top,
501  *mbsplits_cur, *firstidx;
502  VP56mv *top_mv;
503  VP56mv *left_mv = left_mb->bmv;
504  VP56mv *cur_mv = mb->bmv;
505 
506  if (!layout) // layout is inlined, s->mb_layout is not
507  top_mb = &mb[2];
508  else
509  top_mb = &mb[-s->mb_width-1];
510  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511  top_mv = top_mb->bmv;
512 
516  } else {
517  part_idx = VP8_SPLITMVMODE_8x8;
518  }
519  } else {
520  part_idx = VP8_SPLITMVMODE_4x4;
521  }
522 
523  num = vp8_mbsplit_count[part_idx];
524  mbsplits_cur = vp8_mbsplits[part_idx],
525  firstidx = vp8_mbfirstidx[part_idx];
526  mb->partitioning = part_idx;
527 
528  for (n = 0; n < num; n++) {
529  int k = firstidx[n];
530  uint32_t left, above;
531  const uint8_t *submv_prob;
532 
533  if (!(k & 3))
534  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535  else
536  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537  if (k <= 3)
538  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539  else
540  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541 
542  submv_prob = get_submv_prob(left, above);
543 
544  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549  } else {
550  AV_ZERO32(&mb->bmv[n]);
551  }
552  } else {
553  AV_WN32A(&mb->bmv[n], above);
554  }
555  } else {
556  AV_WN32A(&mb->bmv[n], left);
557  }
558  }
559 
560  return num;
561 }
562 
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 {
566  VP8Macroblock *mb_edge[3] = { 0 /* top */,
567  mb - 1 /* left */,
568  0 /* top-left */ };
569  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571  int idx = CNT_ZERO;
572  int cur_sign_bias = s->sign_bias[mb->ref_frame];
573  int8_t *sign_bias = s->sign_bias;
574  VP56mv near_mv[4];
575  uint8_t cnt[4] = { 0 };
576  VP56RangeCoder *c = &s->c;
577 
578  if (!layout) { // layout is inlined (s->mb_layout is not)
579  mb_edge[0] = mb + 2;
580  mb_edge[2] = mb + 1;
581  }
582  else {
583  mb_edge[0] = mb - s->mb_width-1;
584  mb_edge[2] = mb - s->mb_width-2;
585  }
586 
587  AV_ZERO32(&near_mv[0]);
588  AV_ZERO32(&near_mv[1]);
589  AV_ZERO32(&near_mv[2]);
590 
591  /* Process MB on top, left and top-left */
592  #define MV_EDGE_CHECK(n)\
593  {\
594  VP8Macroblock *edge = mb_edge[n];\
595  int edge_ref = edge->ref_frame;\
596  if (edge_ref != VP56_FRAME_CURRENT) {\
597  uint32_t mv = AV_RN32A(&edge->mv);\
598  if (mv) {\
599  if (cur_sign_bias != sign_bias[edge_ref]) {\
600  /* SWAR negate of the values in mv. */\
601  mv = ~mv;\
602  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603  }\
604  if (!n || mv != AV_RN32A(&near_mv[idx]))\
605  AV_WN32A(&near_mv[++idx], mv);\
606  cnt[idx] += 1 + (n != 2);\
607  } else\
608  cnt[CNT_ZERO] += 1 + (n != 2);\
609  }\
610  }
611 
612  MV_EDGE_CHECK(0)
613  MV_EDGE_CHECK(1)
614  MV_EDGE_CHECK(2)
615 
617  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618  mb->mode = VP8_MVMODE_MV;
619 
620  /* If we have three distinct MVs, merge first and last if they're the same */
621  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622  cnt[CNT_NEAREST] += 1;
623 
624  /* Swap near and nearest if necessary */
625  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
628  }
629 
630  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632 
633  /* Choose the best mv out of 0,0 and the nearest mv */
634  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638 
639  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640  mb->mode = VP8_MVMODE_SPLIT;
641  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642  } else {
643  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
645  mb->bmv[0] = mb->mv;
646  }
647  } else {
648  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
649  mb->bmv[0] = mb->mv;
650  }
651  } else {
652  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
653  mb->bmv[0] = mb->mv;
654  }
655  } else {
656  mb->mode = VP8_MVMODE_ZERO;
657  AV_ZERO32(&mb->mv);
658  mb->bmv[0] = mb->mv;
659  }
660 }
661 
662 static av_always_inline
664  int mb_x, int keyframe, int layout)
665 {
666  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
667 
668  if (layout == 1) {
669  VP8Macroblock *mb_top = mb - s->mb_width - 1;
670  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
671  }
672  if (keyframe) {
673  int x, y;
674  uint8_t* top;
675  uint8_t* const left = s->intra4x4_pred_mode_left;
676  if (layout == 1)
677  top = mb->intra4x4_pred_mode_top;
678  else
679  top = s->intra4x4_pred_mode_top + 4 * mb_x;
680  for (y = 0; y < 4; y++) {
681  for (x = 0; x < 4; x++) {
682  const uint8_t *ctx;
683  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685  left[y] = top[x] = *intra4x4;
686  intra4x4++;
687  }
688  }
689  } else {
690  int i;
691  for (i = 0; i < 16; i++)
693  }
694 }
695 
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698  uint8_t *segment, uint8_t *ref, int layout)
699 {
700  VP56RangeCoder *c = &s->c;
701 
702  if (s->segmentation.update_map) {
703  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
704  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
705  } else if (s->segmentation.enabled)
706  *segment = ref ? *ref : *segment;
707  mb->segment = *segment;
708 
709  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
710 
711  if (s->keyframe) {
713 
714  if (mb->mode == MODE_I4x4) {
715  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716  } else {
717  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
718  if (s->mb_layout == 1)
719  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720  else
721  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
722  AV_WN32A( s->intra4x4_pred_mode_left, modes);
723  }
724 
727  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728  // inter MB, 16.2
729  if (vp56_rac_get_prob_branchy(c, s->prob->last))
730  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
731  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732  else
734  s->ref_count[mb->ref_frame-1]++;
735 
736  // motion vectors, 16.3
737  decode_mvs(s, mb, mb_x, mb_y, layout);
738  } else {
739  // intra MB, 16.1
741 
742  if (mb->mode == MODE_I4x4)
743  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 
748  AV_ZERO32(&mb->bmv[0]);
749  }
750 }
751 
752 #ifndef decode_block_coeffs_internal
753 /**
754  * @param r arithmetic bitstream reader context
755  * @param block destination for block coefficients
756  * @param probs probabilities to use when reading trees from the bitstream
757  * @param i initial coeff index, 0 unless a separate DC block is coded
758  * @param qmul array holding the dc/ac dequant factor at position 0/1
759  * @return 0 if no coeffs were decoded
760  * otherwise, the index of the last coeff decoded plus one
761  */
763  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
764  int i, uint8_t *token_prob, int16_t qmul[2])
765 {
766  VP56RangeCoder c = *r;
767  goto skip_eob;
768  do {
769  int coeff;
770  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
771  break;
772 
773 skip_eob:
774  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775  if (++i == 16)
776  break; // invalid input; blocks should end with EOB
777  token_prob = probs[i][0];
778  goto skip_eob;
779  }
780 
781  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782  coeff = 1;
783  token_prob = probs[i+1][1];
784  } else {
785  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
786  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787  if (coeff)
788  coeff += vp56_rac_get_prob(&c, token_prob[5]);
789  coeff += 2;
790  } else {
791  // DCT_CAT*
792  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
793  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
794  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
795  } else { // DCT_CAT2
796  coeff = 7;
797  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
798  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799  }
800  } else { // DCT_CAT3 and up
801  int a = vp56_rac_get_prob(&c, token_prob[8]);
802  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
803  int cat = (a<<1) + b;
804  coeff = 3 + (8<<cat);
805  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
806  }
807  }
808  token_prob = probs[i+1][2];
809  }
810  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
811  } while (++i < 16);
812 
813  *r = c;
814  return i;
815 }
816 #endif
817 
818 /**
819  * @param c arithmetic bitstream reader context
820  * @param block destination for block coefficients
821  * @param probs probabilities to use when reading trees from the bitstream
822  * @param i initial coeff index, 0 unless a separate DC block is coded
823  * @param zero_nhood the initial prediction context for number of surrounding
824  * all-zero blocks (only left/top, so 0-2)
825  * @param qmul array holding the dc/ac dequant factor at position 0/1
826  * @return 0 if no coeffs were decoded
827  * otherwise, the index of the last coeff decoded plus one
828  */
829 static av_always_inline
831  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
832  int i, int zero_nhood, int16_t qmul[2])
833 {
834  uint8_t *token_prob = probs[i][zero_nhood];
835  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836  return 0;
837  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
838 }
839 
840 static av_always_inline
842  uint8_t t_nnz[9], uint8_t l_nnz[9])
843 {
844  int i, x, y, luma_start = 0, luma_ctx = 3;
845  int nnz_pred, nnz, nnz_total = 0;
846  int segment = mb->segment;
847  int block_dc = 0;
848 
849  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
850  nnz_pred = t_nnz[8] + l_nnz[8];
851 
852  // decode DC values and do hadamard
853  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
854  s->qmat[segment].luma_dc_qmul);
855  l_nnz[8] = t_nnz[8] = !!nnz;
856  if (nnz) {
857  nnz_total += nnz;
858  block_dc = 1;
859  if (nnz == 1)
861  else
862  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
863  }
864  luma_start = 1;
865  luma_ctx = 0;
866  }
867 
868  // luma blocks
869  for (y = 0; y < 4; y++)
870  for (x = 0; x < 4; x++) {
871  nnz_pred = l_nnz[y] + t_nnz[x];
872  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
873  nnz_pred, s->qmat[segment].luma_qmul);
874  // nnz+block_dc may be one more than the actual last index, but we don't care
875  td->non_zero_count_cache[y][x] = nnz + block_dc;
876  t_nnz[x] = l_nnz[y] = !!nnz;
877  nnz_total += nnz;
878  }
879 
880  // chroma blocks
881  // TODO: what to do about dimensions? 2nd dim for luma is x,
882  // but for chroma it's (y<<1)|x
883  for (i = 4; i < 6; i++)
884  for (y = 0; y < 2; y++)
885  for (x = 0; x < 2; x++) {
886  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
887  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
888  nnz_pred, s->qmat[segment].chroma_qmul);
889  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
890  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
891  nnz_total += nnz;
892  }
893 
894  // if there were no coded coeffs despite the macroblock not being marked skip,
895  // we MUST not do the inner loop filter and should not do IDCT
896  // Since skip isn't used for bitstream prediction, just manually set it.
897  if (!nnz_total)
898  mb->skip = 1;
899 }
900 
901 static av_always_inline
902 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
903  int linesize, int uvlinesize, int simple)
904 {
905  AV_COPY128(top_border, src_y + 15*linesize);
906  if (!simple) {
907  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
908  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
909  }
910 }
911 
912 static av_always_inline
913 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
914  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
915  int simple, int xchg)
916 {
917  uint8_t *top_border_m1 = top_border-32; // for TL prediction
918  src_y -= linesize;
919  src_cb -= uvlinesize;
920  src_cr -= uvlinesize;
921 
922 #define XCHG(a,b,xchg) do { \
923  if (xchg) AV_SWAP64(b,a); \
924  else AV_COPY64(b,a); \
925  } while (0)
926 
927  XCHG(top_border_m1+8, src_y-8, xchg);
928  XCHG(top_border, src_y, xchg);
929  XCHG(top_border+8, src_y+8, 1);
930  if (mb_x < mb_width-1)
931  XCHG(top_border+32, src_y+16, 1);
932 
933  // only copy chroma for normal loop filter
934  // or to initialize the top row to 127
935  if (!simple || !mb_y) {
936  XCHG(top_border_m1+16, src_cb-8, xchg);
937  XCHG(top_border_m1+24, src_cr-8, xchg);
938  XCHG(top_border+16, src_cb, 1);
939  XCHG(top_border+24, src_cr, 1);
940  }
941 }
942 
943 static av_always_inline
944 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
945 {
946  if (!mb_x) {
947  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948  } else {
949  return mb_y ? mode : LEFT_DC_PRED8x8;
950  }
951 }
952 
953 static av_always_inline
954 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
955 {
956  if (!mb_x) {
957  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958  } else {
959  return mb_y ? mode : HOR_PRED8x8;
960  }
961 }
962 
963 static av_always_inline
964 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
965 {
966  if (mode == DC_PRED8x8) {
967  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
968  } else {
969  return mode;
970  }
971 }
972 
973 static av_always_inline
974 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
975 {
976  switch (mode) {
977  case DC_PRED8x8:
978  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
979  case VERT_PRED8x8:
980  return !mb_y ? DC_127_PRED8x8 : mode;
981  case HOR_PRED8x8:
982  return !mb_x ? DC_129_PRED8x8 : mode;
983  case PLANE_PRED8x8 /*TM*/:
984  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
985  }
986  return mode;
987 }
988 
989 static av_always_inline
990 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
991 {
992  if (!mb_x) {
993  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
994  } else {
995  return mb_y ? mode : HOR_VP8_PRED;
996  }
997 }
998 
999 static av_always_inline
1000 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1001 {
1002  switch (mode) {
1003  case VERT_PRED:
1004  if (!mb_x && mb_y) {
1005  *copy_buf = 1;
1006  return mode;
1007  }
1008  /* fall-through */
1009  case DIAG_DOWN_LEFT_PRED:
1010  case VERT_LEFT_PRED:
1011  return !mb_y ? DC_127_PRED : mode;
1012  case HOR_PRED:
1013  if (!mb_y) {
1014  *copy_buf = 1;
1015  return mode;
1016  }
1017  /* fall-through */
1018  case HOR_UP_PRED:
1019  return !mb_x ? DC_129_PRED : mode;
1020  case TM_VP8_PRED:
1021  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1022  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1023  case DIAG_DOWN_RIGHT_PRED:
1024  case VERT_RIGHT_PRED:
1025  case HOR_DOWN_PRED:
1026  if (!mb_y || !mb_x)
1027  *copy_buf = 1;
1028  return mode;
1029  }
1030  return mode;
1031 }
1032 
1033 static av_always_inline
1035  VP8Macroblock *mb, int mb_x, int mb_y)
1036 {
1037  AVCodecContext *avctx = s->avctx;
1038  int x, y, mode, nnz;
1039  uint32_t tr;
1040 
1041  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1042  // otherwise, skip it if we aren't going to deblock
1043  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1044  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1045  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1046  s->filter.simple, 1);
1047 
1048  if (mb->mode < MODE_I4x4) {
1049  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1050  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1051  } else {
1052  mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1053  }
1054  s->hpc.pred16x16[mode](dst[0], s->linesize);
1055  } else {
1056  uint8_t *ptr = dst[0];
1057  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1058  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1059 
1060  // all blocks on the right edge of the macroblock use bottom edge
1061  // the top macroblock for their topright edge
1062  uint8_t *tr_right = ptr - s->linesize + 16;
1063 
1064  // if we're on the right edge of the frame, said edge is extended
1065  // from the top macroblock
1066  if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1067  mb_x == s->mb_width-1) {
1068  tr = tr_right[-1]*0x01010101u;
1069  tr_right = (uint8_t *)&tr;
1070  }
1071 
1072  if (mb->skip)
1074 
1075  for (y = 0; y < 4; y++) {
1076  uint8_t *topright = ptr + 4 - s->linesize;
1077  for (x = 0; x < 4; x++) {
1078  int copy = 0, linesize = s->linesize;
1079  uint8_t *dst = ptr+4*x;
1080  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1081 
1082  if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1083  topright = tr_top;
1084  } else if (x == 3)
1085  topright = tr_right;
1086 
1087  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1088  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1089  if (copy) {
1090  dst = copy_dst + 12;
1091  linesize = 8;
1092  if (!(mb_y + y)) {
1093  copy_dst[3] = 127U;
1094  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095  } else {
1096  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1097  if (!(mb_x + x)) {
1098  copy_dst[3] = 129U;
1099  } else {
1100  copy_dst[3] = ptr[4*x-s->linesize-1];
1101  }
1102  }
1103  if (!(mb_x + x)) {
1104  copy_dst[11] =
1105  copy_dst[19] =
1106  copy_dst[27] =
1107  copy_dst[35] = 129U;
1108  } else {
1109  copy_dst[11] = ptr[4*x -1];
1110  copy_dst[19] = ptr[4*x+s->linesize -1];
1111  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1112  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1113  }
1114  }
1115  } else {
1116  mode = intra4x4[x];
1117  }
1118  s->hpc.pred4x4[mode](dst, topright, linesize);
1119  if (copy) {
1120  AV_COPY32(ptr+4*x , copy_dst+12);
1121  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1122  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1123  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1124  }
1125 
1126  nnz = td->non_zero_count_cache[y][x];
1127  if (nnz) {
1128  if (nnz == 1)
1129  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1130  else
1131  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1132  }
1133  topright += 4;
1134  }
1135 
1136  ptr += 4*s->linesize;
1137  intra4x4 += 4;
1138  }
1139  }
1140 
1141  if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1142  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143  } else {
1144  mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145  }
1146  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1147  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148 
1149  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1150  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1151  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1152  s->filter.simple, 0);
1153 }
1154 
1155 static const uint8_t subpel_idx[3][8] = {
1156  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1157  // also function pointer index
1158  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1159  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1160 };
1161 
1162 /**
1163  * luma MC function
1164  *
1165  * @param s VP8 decoding context
1166  * @param dst target buffer for block data at block position
1167  * @param ref reference picture buffer at origin (0, 0)
1168  * @param mv motion vector (relative to block position) to get pixel data from
1169  * @param x_off horizontal position of block from origin (0, 0)
1170  * @param y_off vertical position of block from origin (0, 0)
1171  * @param block_w width of block (16, 8 or 4)
1172  * @param block_h height of block (always same as block_w)
1173  * @param width width of src/dst plane data
1174  * @param height height of src/dst plane data
1175  * @param linesize size of a single line of plane data, including padding
1176  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1177  */
1178 static av_always_inline
1180  ThreadFrame *ref, const VP56mv *mv,
1181  int x_off, int y_off, int block_w, int block_h,
1182  int width, int height, ptrdiff_t linesize,
1183  vp8_mc_func mc_func[3][3])
1184 {
1185  uint8_t *src = ref->f->data[0];
1186 
1187  if (AV_RN32A(mv)) {
1188  int src_linesize = linesize;
1189  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1190  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1191 
1192  x_off += mv->x >> 2;
1193  y_off += mv->y >> 2;
1194 
1195  // edge emulation
1196  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1197  src += y_off * linesize + x_off;
1198  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1199  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1201  src - my_idx * linesize - mx_idx, linesize,
1202  block_w + subpel_idx[1][mx],
1203  block_h + subpel_idx[1][my],
1204  x_off - mx_idx, y_off - my_idx, width, height);
1205  src = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1206  src_linesize = 32;
1207  }
1208  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1209  } else {
1210  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1211  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1212  }
1213 }
1214 
1215 /**
1216  * chroma MC function
1217  *
1218  * @param s VP8 decoding context
1219  * @param dst1 target buffer for block data at block position (U plane)
1220  * @param dst2 target buffer for block data at block position (V plane)
1221  * @param ref reference picture buffer at origin (0, 0)
1222  * @param mv motion vector (relative to block position) to get pixel data from
1223  * @param x_off horizontal position of block from origin (0, 0)
1224  * @param y_off vertical position of block from origin (0, 0)
1225  * @param block_w width of block (16, 8 or 4)
1226  * @param block_h height of block (always same as block_w)
1227  * @param width width of src/dst plane data
1228  * @param height height of src/dst plane data
1229  * @param linesize size of a single line of plane data, including padding
1230  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1231  */
1232 static av_always_inline
1234  ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1235  int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1236  vp8_mc_func mc_func[3][3])
1237 {
1238  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1239 
1240  if (AV_RN32A(mv)) {
1241  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1242  int my = mv->y&7, my_idx = subpel_idx[0][my];
1243 
1244  x_off += mv->x >> 3;
1245  y_off += mv->y >> 3;
1246 
1247  // edge emulation
1248  src1 += y_off * linesize + x_off;
1249  src2 += y_off * linesize + x_off;
1250  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1251  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1252  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1254  src1 - my_idx * linesize - mx_idx, linesize,
1255  block_w + subpel_idx[1][mx],
1256  block_h + subpel_idx[1][my],
1257  x_off - mx_idx, y_off - my_idx, width, height);
1258  src1 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1259  mc_func[my_idx][mx_idx](dst1, linesize, src1, 32, block_h, mx, my);
1260 
1262  src2 - my_idx * linesize - mx_idx, linesize,
1263  block_w + subpel_idx[1][mx],
1264  block_h + subpel_idx[1][my],
1265  x_off - mx_idx, y_off - my_idx, width, height);
1266  src2 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1267  mc_func[my_idx][mx_idx](dst2, linesize, src2, 32, block_h, mx, my);
1268  } else {
1269  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1270  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1271  }
1272  } else {
1273  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1274  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1275  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1276  }
1277 }
1278 
1279 static av_always_inline
1281  ThreadFrame *ref_frame, int x_off, int y_off,
1282  int bx_off, int by_off,
1283  int block_w, int block_h,
1284  int width, int height, VP56mv *mv)
1285 {
1286  VP56mv uvmv = *mv;
1287 
1288  /* Y */
1289  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1290  ref_frame, mv, x_off + bx_off, y_off + by_off,
1291  block_w, block_h, width, height, s->linesize,
1292  s->put_pixels_tab[block_w == 8]);
1293 
1294  /* U/V */
1295  if (s->profile == 3) {
1296  uvmv.x &= ~7;
1297  uvmv.y &= ~7;
1298  }
1299  x_off >>= 1; y_off >>= 1;
1300  bx_off >>= 1; by_off >>= 1;
1301  width >>= 1; height >>= 1;
1302  block_w >>= 1; block_h >>= 1;
1303  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1304  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1305  &uvmv, x_off + bx_off, y_off + by_off,
1306  block_w, block_h, width, height, s->uvlinesize,
1307  s->put_pixels_tab[1 + (block_w == 4)]);
1308 }
1309 
1310 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1311  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1312 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1313 {
1314  /* Don't prefetch refs that haven't been used very often this frame. */
1315  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1316  int x_off = mb_x << 4, y_off = mb_y << 4;
1317  int mx = (mb->mv.x>>2) + x_off + 8;
1318  int my = (mb->mv.y>>2) + y_off;
1319  uint8_t **src= s->framep[ref]->tf.f->data;
1320  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1321  /* For threading, a ff_thread_await_progress here might be useful, but
1322  * it actually slows down the decoder. Since a bad prefetch doesn't
1323  * generate bad decoder output, we don't run it here. */
1324  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1325  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1326  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1327  }
1328 }
1329 
1330 /**
1331  * Apply motion vectors to prediction buffer, chapter 18.
1332  */
1333 static av_always_inline
1335  VP8Macroblock *mb, int mb_x, int mb_y)
1336 {
1337  int x_off = mb_x << 4, y_off = mb_y << 4;
1338  int width = 16*s->mb_width, height = 16*s->mb_height;
1339  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1340  VP56mv *bmv = mb->bmv;
1341 
1342  switch (mb->partitioning) {
1343  case VP8_SPLITMVMODE_NONE:
1344  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1345  0, 0, 16, 16, width, height, &mb->mv);
1346  break;
1347  case VP8_SPLITMVMODE_4x4: {
1348  int x, y;
1349  VP56mv uvmv;
1350 
1351  /* Y */
1352  for (y = 0; y < 4; y++) {
1353  for (x = 0; x < 4; x++) {
1354  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1355  ref, &bmv[4*y + x],
1356  4*x + x_off, 4*y + y_off, 4, 4,
1357  width, height, s->linesize,
1358  s->put_pixels_tab[2]);
1359  }
1360  }
1361 
1362  /* U/V */
1363  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1364  for (y = 0; y < 2; y++) {
1365  for (x = 0; x < 2; x++) {
1366  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1367  mb->bmv[ 2*y * 4 + 2*x+1].x +
1368  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1369  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1370  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1371  mb->bmv[ 2*y * 4 + 2*x+1].y +
1372  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1373  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1374  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1375  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1376  if (s->profile == 3) {
1377  uvmv.x &= ~7;
1378  uvmv.y &= ~7;
1379  }
1380  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1381  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1382  4*x + x_off, 4*y + y_off, 4, 4,
1383  width, height, s->uvlinesize,
1384  s->put_pixels_tab[2]);
1385  }
1386  }
1387  break;
1388  }
1389  case VP8_SPLITMVMODE_16x8:
1390  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391  0, 0, 16, 8, width, height, &bmv[0]);
1392  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393  0, 8, 16, 8, width, height, &bmv[1]);
1394  break;
1395  case VP8_SPLITMVMODE_8x16:
1396  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397  0, 0, 8, 16, width, height, &bmv[0]);
1398  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399  8, 0, 8, 16, width, height, &bmv[1]);
1400  break;
1401  case VP8_SPLITMVMODE_8x8:
1402  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403  0, 0, 8, 8, width, height, &bmv[0]);
1404  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1405  8, 0, 8, 8, width, height, &bmv[1]);
1406  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1407  0, 8, 8, 8, width, height, &bmv[2]);
1408  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1409  8, 8, 8, 8, width, height, &bmv[3]);
1410  break;
1411  }
1412 }
1413 
1415  uint8_t *dst[3], VP8Macroblock *mb)
1416 {
1417  int x, y, ch;
1418 
1419  if (mb->mode != MODE_I4x4) {
1420  uint8_t *y_dst = dst[0];
1421  for (y = 0; y < 4; y++) {
1422  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1423  if (nnz4) {
1424  if (nnz4&~0x01010101) {
1425  for (x = 0; x < 4; x++) {
1426  if ((uint8_t)nnz4 == 1)
1427  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1428  else if((uint8_t)nnz4 > 1)
1429  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1430  nnz4 >>= 8;
1431  if (!nnz4)
1432  break;
1433  }
1434  } else {
1435  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1436  }
1437  }
1438  y_dst += 4*s->linesize;
1439  }
1440  }
1441 
1442  for (ch = 0; ch < 2; ch++) {
1443  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1444  if (nnz4) {
1445  uint8_t *ch_dst = dst[1+ch];
1446  if (nnz4&~0x01010101) {
1447  for (y = 0; y < 2; y++) {
1448  for (x = 0; x < 2; x++) {
1449  if ((uint8_t)nnz4 == 1)
1450  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1451  else if((uint8_t)nnz4 > 1)
1452  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1453  nnz4 >>= 8;
1454  if (!nnz4)
1455  goto chroma_idct_end;
1456  }
1457  ch_dst += 4*s->uvlinesize;
1458  }
1459  } else {
1460  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1461  }
1462  }
1463 chroma_idct_end: ;
1464  }
1465 }
1466 
1468 {
1469  int interior_limit, filter_level;
1470 
1471  if (s->segmentation.enabled) {
1472  filter_level = s->segmentation.filter_level[mb->segment];
1473  if (!s->segmentation.absolute_vals)
1474  filter_level += s->filter.level;
1475  } else
1476  filter_level = s->filter.level;
1477 
1478  if (s->lf_delta.enabled) {
1479  filter_level += s->lf_delta.ref[mb->ref_frame];
1480  filter_level += s->lf_delta.mode[mb->mode];
1481  }
1482 
1483  filter_level = av_clip_uintp2(filter_level, 6);
1484 
1485  interior_limit = filter_level;
1486  if (s->filter.sharpness) {
1487  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1488  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1489  }
1490  interior_limit = FFMAX(interior_limit, 1);
1491 
1492  f->filter_level = filter_level;
1493  f->inner_limit = interior_limit;
1494  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1495 }
1496 
1497 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1498 {
1499  int mbedge_lim, bedge_lim, hev_thresh;
1500  int filter_level = f->filter_level;
1501  int inner_limit = f->inner_limit;
1502  int inner_filter = f->inner_filter;
1503  int linesize = s->linesize;
1504  int uvlinesize = s->uvlinesize;
1505  static const uint8_t hev_thresh_lut[2][64] = {
1506  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1507  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1508  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1509  3, 3, 3, 3 },
1510  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1511  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1512  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1513  2, 2, 2, 2 }
1514  };
1515 
1516  if (!filter_level)
1517  return;
1518 
1519  bedge_lim = 2*filter_level + inner_limit;
1520  mbedge_lim = bedge_lim + 4;
1521 
1522  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1523 
1524  if (mb_x) {
1525  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1526  mbedge_lim, inner_limit, hev_thresh);
1527  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1528  mbedge_lim, inner_limit, hev_thresh);
1529  }
1530 
1531  if (inner_filter) {
1532  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1533  inner_limit, hev_thresh);
1534  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1535  inner_limit, hev_thresh);
1536  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1537  inner_limit, hev_thresh);
1538  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1539  uvlinesize, bedge_lim,
1540  inner_limit, hev_thresh);
1541  }
1542 
1543  if (mb_y) {
1544  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1545  mbedge_lim, inner_limit, hev_thresh);
1546  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1547  mbedge_lim, inner_limit, hev_thresh);
1548  }
1549 
1550  if (inner_filter) {
1551  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1552  linesize, bedge_lim,
1553  inner_limit, hev_thresh);
1554  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1555  linesize, bedge_lim,
1556  inner_limit, hev_thresh);
1557  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1558  linesize, bedge_lim,
1559  inner_limit, hev_thresh);
1560  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1561  dst[2] + 4 * uvlinesize,
1562  uvlinesize, bedge_lim,
1563  inner_limit, hev_thresh);
1564  }
1565 }
1566 
1567 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1568 {
1569  int mbedge_lim, bedge_lim;
1570  int filter_level = f->filter_level;
1571  int inner_limit = f->inner_limit;
1572  int inner_filter = f->inner_filter;
1573  int linesize = s->linesize;
1574 
1575  if (!filter_level)
1576  return;
1577 
1578  bedge_lim = 2*filter_level + inner_limit;
1579  mbedge_lim = bedge_lim + 4;
1580 
1581  if (mb_x)
1582  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1583  if (inner_filter) {
1584  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1585  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1586  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1587  }
1588 
1589  if (mb_y)
1590  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1591  if (inner_filter) {
1592  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1593  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1594  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1595  }
1596 }
1597 
1598 #define MARGIN (16 << 2)
1599 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1600  VP8Frame *prev_frame)
1601 {
1602  VP8Context *s = avctx->priv_data;
1603  int mb_x, mb_y;
1604 
1605  s->mv_min.y = -MARGIN;
1606  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1607  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1608  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1609  int mb_xy = mb_y*s->mb_width;
1610 
1611  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1612 
1613  s->mv_min.x = -MARGIN;
1614  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1615  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1616  if (mb_y == 0)
1617  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1618  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1619  prev_frame && prev_frame->seg_map ?
1620  prev_frame->seg_map->data + mb_xy : NULL, 1);
1621  s->mv_min.x -= 64;
1622  s->mv_max.x -= 64;
1623  }
1624  s->mv_min.y -= 64;
1625  s->mv_max.y -= 64;
1626  }
1627 }
1628 
1629 #if HAVE_THREADS
1630 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1631  do {\
1632  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1633  if (otd->thread_mb_pos < tmp) {\
1634  pthread_mutex_lock(&otd->lock);\
1635  td->wait_mb_pos = tmp;\
1636  do {\
1637  if (otd->thread_mb_pos >= tmp)\
1638  break;\
1639  pthread_cond_wait(&otd->cond, &otd->lock);\
1640  } while (1);\
1641  td->wait_mb_pos = INT_MAX;\
1642  pthread_mutex_unlock(&otd->lock);\
1643  }\
1644  } while(0);
1645 
1646 #define update_pos(td, mb_y, mb_x)\
1647  do {\
1648  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1649  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1650  int is_null = (next_td == NULL) || (prev_td == NULL);\
1651  int pos_check = (is_null) ? 1 :\
1652  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1653  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1654  td->thread_mb_pos = pos;\
1655  if (sliced_threading && pos_check) {\
1656  pthread_mutex_lock(&td->lock);\
1657  pthread_cond_broadcast(&td->cond);\
1658  pthread_mutex_unlock(&td->lock);\
1659  }\
1660  } while(0);
1661 #else
1662 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1663 #define update_pos(td, mb_y, mb_x)
1664 #endif
1665 
1666 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1667  int jobnr, int threadnr)
1668 {
1669  VP8Context *s = avctx->priv_data;
1670  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1671  int mb_y = td->thread_mb_pos>>16;
1672  int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1673  int num_jobs = s->num_jobs;
1674  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1675  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1676  VP8Macroblock *mb;
1677  uint8_t *dst[3] = {
1678  curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1679  curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1680  curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1681  };
1682  if (mb_y == 0) prev_td = td;
1683  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1684  if (mb_y == s->mb_height-1) next_td = td;
1685  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1686  if (s->mb_layout == 1)
1687  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1688  else {
1689  // Make sure the previous frame has read its segmentation map,
1690  // if we re-use the same map.
1691  if (prev_frame && s->segmentation.enabled &&
1693  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1694  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1695  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1696  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1697  }
1698 
1699  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1700  // left edge of 129 for intra prediction
1701  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1702  for (i = 0; i < 3; i++)
1703  for (y = 0; y < 16>>!!i; y++)
1704  dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1705  if (mb_y == 1) {
1706  s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1707  }
1708  }
1709 
1710  s->mv_min.x = -MARGIN;
1711  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1712 
1713  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1714  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1715  if (prev_td != td) {
1716  if (threadnr != 0) {
1717  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1718  } else {
1719  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1720  }
1721  }
1722 
1723  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1724  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1725 
1726  if (!s->mb_layout)
1727  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1728  prev_frame && prev_frame->seg_map ?
1729  prev_frame->seg_map->data + mb_xy : NULL, 0);
1730 
1731  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1732 
1733  if (!mb->skip)
1734  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1735 
1736  if (mb->mode <= MODE_I4x4)
1737  intra_predict(s, td, dst, mb, mb_x, mb_y);
1738  else
1739  inter_predict(s, td, dst, mb, mb_x, mb_y);
1740 
1741  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1742 
1743  if (!mb->skip) {
1744  idct_mb(s, td, dst, mb);
1745  } else {
1746  AV_ZERO64(td->left_nnz);
1747  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1748 
1749  // Reset DC block predictors if they would exist if the mb had coefficients
1750  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1751  td->left_nnz[8] = 0;
1752  s->top_nnz[mb_x][8] = 0;
1753  }
1754  }
1755 
1756  if (s->deblock_filter)
1757  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1758 
1759  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1760  if (s->filter.simple)
1761  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1762  else
1763  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1764  }
1765 
1766  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1767 
1768  dst[0] += 16;
1769  dst[1] += 8;
1770  dst[2] += 8;
1771  s->mv_min.x -= 64;
1772  s->mv_max.x -= 64;
1773 
1774  if (mb_x == s->mb_width+1) {
1775  update_pos(td, mb_y, s->mb_width+3);
1776  } else {
1777  update_pos(td, mb_y, mb_x);
1778  }
1779  }
1780 }
1781 
1782 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1783  int jobnr, int threadnr)
1784 {
1785  VP8Context *s = avctx->priv_data;
1786  VP8ThreadData *td = &s->thread_data[threadnr];
1787  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1788  AVFrame *curframe = s->curframe->tf.f;
1789  VP8Macroblock *mb;
1790  VP8ThreadData *prev_td, *next_td;
1791  uint8_t *dst[3] = {
1792  curframe->data[0] + 16*mb_y*s->linesize,
1793  curframe->data[1] + 8*mb_y*s->uvlinesize,
1794  curframe->data[2] + 8*mb_y*s->uvlinesize
1795  };
1796 
1797  if (s->mb_layout == 1)
1798  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1799  else
1800  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1801 
1802  if (mb_y == 0) prev_td = td;
1803  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1804  if (mb_y == s->mb_height-1) next_td = td;
1805  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1806 
1807  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1808  VP8FilterStrength *f = &td->filter_strength[mb_x];
1809  if (prev_td != td) {
1810  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1811  }
1812  if (next_td != td)
1813  if (next_td != &s->thread_data[0]) {
1814  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1815  }
1816 
1817  if (num_jobs == 1) {
1818  if (s->filter.simple)
1819  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1820  else
1821  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1822  }
1823 
1824  if (s->filter.simple)
1825  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1826  else
1827  filter_mb(s, dst, f, mb_x, mb_y);
1828  dst[0] += 16;
1829  dst[1] += 8;
1830  dst[2] += 8;
1831 
1832  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1833  }
1834 }
1835 
1836 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1837  int jobnr, int threadnr)
1838 {
1839  VP8Context *s = avctx->priv_data;
1840  VP8ThreadData *td = &s->thread_data[jobnr];
1841  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1842  VP8Frame *curframe = s->curframe;
1843  int mb_y, num_jobs = s->num_jobs;
1844  td->thread_nr = threadnr;
1845  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1846  if (mb_y >= s->mb_height) break;
1847  td->thread_mb_pos = mb_y<<16;
1848  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1849  if (s->deblock_filter)
1850  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1851  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1852 
1853  s->mv_min.y -= 64;
1854  s->mv_max.y -= 64;
1855 
1856  if (avctx->active_thread_type == FF_THREAD_FRAME)
1857  ff_thread_report_progress(&curframe->tf, mb_y, 0);
1858  }
1859 
1860  return 0;
1861 }
1862 
1863 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1864  AVPacket *avpkt)
1865 {
1866  VP8Context *s = avctx->priv_data;
1867  int ret, i, referenced, num_jobs;
1868  enum AVDiscard skip_thresh;
1869  VP8Frame *av_uninit(curframe), *prev_frame;
1870 
1871  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1872  goto err;
1873 
1874  prev_frame = s->framep[VP56_FRAME_CURRENT];
1875 
1876  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1878 
1879  skip_thresh = !referenced ? AVDISCARD_NONREF :
1881 
1882  if (avctx->skip_frame >= skip_thresh) {
1883  s->invisible = 1;
1884  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1885  goto skip_decode;
1886  }
1887  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1888 
1889  // release no longer referenced frames
1890  for (i = 0; i < 5; i++)
1891  if (s->frames[i].tf.f->data[0] &&
1892  &s->frames[i] != prev_frame &&
1893  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1894  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1895  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1896  vp8_release_frame(s, &s->frames[i]);
1897 
1898  // find a free buffer
1899  for (i = 0; i < 5; i++)
1900  if (&s->frames[i] != prev_frame &&
1901  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1902  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1903  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1904  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1905  break;
1906  }
1907  if (i == 5) {
1908  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1909  abort();
1910  }
1911  if (curframe->tf.f->data[0])
1912  vp8_release_frame(s, curframe);
1913 
1914  // Given that arithmetic probabilities are updated every frame, it's quite likely
1915  // that the values we have on a random interframe are complete junk if we didn't
1916  // start decode on a keyframe. So just don't display anything rather than junk.
1917  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1918  !s->framep[VP56_FRAME_GOLDEN] ||
1919  !s->framep[VP56_FRAME_GOLDEN2])) {
1920  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1921  ret = AVERROR_INVALIDDATA;
1922  goto err;
1923  }
1924 
1925  curframe->tf.f->key_frame = s->keyframe;
1926  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1927  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1928  goto err;
1929 
1930  // check if golden and altref are swapped
1931  if (s->update_altref != VP56_FRAME_NONE) {
1933  } else {
1935  }
1936  if (s->update_golden != VP56_FRAME_NONE) {
1938  } else {
1940  }
1941  if (s->update_last) {
1942  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1943  } else {
1945  }
1946  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1947 
1948  ff_thread_finish_setup(avctx);
1949 
1950  s->linesize = curframe->tf.f->linesize[0];
1951  s->uvlinesize = curframe->tf.f->linesize[1];
1952 
1953  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1954  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1955  if (!s->mb_layout)
1956  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1957  if (!s->mb_layout && s->keyframe)
1958  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1959 
1960  // top edge of 127 for intra prediction
1961  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1962  s->top_border[0][15] = s->top_border[0][23] = 127;
1963  s->top_border[0][31] = 127;
1964  memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1965  }
1966  memset(s->ref_count, 0, sizeof(s->ref_count));
1967 
1968 
1969  if (s->mb_layout == 1) {
1970  // Make sure the previous frame has read its segmentation map,
1971  // if we re-use the same map.
1972  if (prev_frame && s->segmentation.enabled &&
1974  ff_thread_await_progress(&prev_frame->tf, 1, 0);
1975  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1976  }
1977 
1978  if (avctx->active_thread_type == FF_THREAD_FRAME)
1979  num_jobs = 1;
1980  else
1981  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1982  s->num_jobs = num_jobs;
1983  s->curframe = curframe;
1984  s->prev_frame = prev_frame;
1985  s->mv_min.y = -MARGIN;
1986  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1987  for (i = 0; i < MAX_THREADS; i++) {
1988  s->thread_data[i].thread_mb_pos = 0;
1989  s->thread_data[i].wait_mb_pos = INT_MAX;
1990  }
1991  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1992 
1993  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1994  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1995 
1996 skip_decode:
1997  // if future frames don't use the updated probabilities,
1998  // reset them to the values we saved
1999  if (!s->update_probabilities)
2000  s->prob[0] = s->prob[1];
2001 
2002  if (!s->invisible) {
2003  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2004  return ret;
2005  *got_frame = 1;
2006  }
2007 
2008  return avpkt->size;
2009 err:
2010  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2011  return ret;
2012 }
2013 
2015 {
2016  VP8Context *s = avctx->priv_data;
2017  int i;
2018 
2019  vp8_decode_flush_impl(avctx, 1);
2020  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2021  av_frame_free(&s->frames[i].tf.f);
2022 
2023  return 0;
2024 }
2025 
2027 {
2028  int i;
2029  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2030  s->frames[i].tf.f = av_frame_alloc();
2031  if (!s->frames[i].tf.f)
2032  return AVERROR(ENOMEM);
2033  }
2034  return 0;
2035 }
2036 
2038 {
2039  VP8Context *s = avctx->priv_data;
2040  int ret;
2041 
2042  s->avctx = avctx;
2043  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2044  avctx->internal->allocate_progress = 1;
2045 
2046  ff_videodsp_init(&s->vdsp, 8);
2048  ff_vp8dsp_init(&s->vp8dsp);
2049 
2050  if ((ret = vp8_init_frames(s)) < 0) {
2051  ff_vp8_decode_free(avctx);
2052  return ret;
2053  }
2054 
2055  return 0;
2056 }
2057 
2059 {
2060  VP8Context *s = avctx->priv_data;
2061  int ret;
2062 
2063  s->avctx = avctx;
2064 
2065  if ((ret = vp8_init_frames(s)) < 0) {
2066  ff_vp8_decode_free(avctx);
2067  return ret;
2068  }
2069 
2070  return 0;
2071 }
2072 
2073 #define REBASE(pic) \
2074  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2075 
2077 {
2078  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2079  int i;
2080 
2081  if (s->macroblocks_base &&
2082  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2083  free_buffers(s);
2084  s->mb_width = s_src->mb_width;
2085  s->mb_height = s_src->mb_height;
2086  }
2087 
2088  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2089  s->segmentation = s_src->segmentation;
2090  s->lf_delta = s_src->lf_delta;
2091  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2092 
2093  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2094  if (s_src->frames[i].tf.f->data[0]) {
2095  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2096  if (ret < 0)
2097  return ret;
2098  }
2099  }
2100 
2101  s->framep[0] = REBASE(s_src->next_framep[0]);
2102  s->framep[1] = REBASE(s_src->next_framep[1]);
2103  s->framep[2] = REBASE(s_src->next_framep[2]);
2104  s->framep[3] = REBASE(s_src->next_framep[3]);
2105 
2106  return 0;
2107 }
2108 
2109 static unsigned apply_padding(unsigned size) { return size + (size & 1); }
2110 
2111 static int webp_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
2112  AVPacket *avpkt)
2113 {
2114  const uint8_t *buf = avpkt->data;
2115  int buf_size = avpkt->size;
2116  AVPacket pkt = *avpkt;
2117 
2118  if (buf_size >= 16
2119  && AV_RL32(buf ) == AV_RL32("RIFF")
2120  && AV_RL32(buf+ 8) == AV_RL32("WEBP")) {
2121  unsigned riff_size = apply_padding(AV_RL32(buf+4)) + 8;
2122  buf += 12; // Skip over main header
2123  buf_size -= 12;
2124  if (buf_size < 8 || riff_size < 8) {
2125  av_log(avctx, AV_LOG_ERROR, "Incomplete header.\n");
2126  return AVERROR_INVALIDDATA;
2127  }
2128  if (AV_RL32(buf) == AV_RL32("VP8L")) {
2129  av_log(avctx, AV_LOG_ERROR, "Unsupported WebP lossless format.\n");
2130  return AVERROR_PATCHWELCOME;
2131  }
2132  if (AV_RL32(buf) == AV_RL32("VP8X") && AV_RL32(buf+4) < (unsigned)buf_size) {
2133  unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2134  buf += size;
2135  buf_size -= size;
2136  }
2137  if (buf_size >= 8
2138  && AV_RL32(buf) == AV_RL32("ALPH") && AV_RL32(buf+4) < (unsigned)buf_size) {
2139  unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2140  buf += size;
2141  buf_size -= size;
2142  av_log(avctx, AV_LOG_WARNING, "Skipping alpha plane\n");
2143  }
2144  if (buf_size >= 8 && AV_RL32(buf) == AV_RL32("VP8 ")) {
2145  buf += 8;
2146  buf_size -= 8;
2147  }
2148  }
2149  pkt.data = buf;
2150  pkt.size = buf_size;
2151 
2152  return ff_vp8_decode_frame(avctx, data, data_size, &pkt);
2153 }
2154 
2156  .name = "vp8",
2157  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2158  .type = AVMEDIA_TYPE_VIDEO,
2159  .id = AV_CODEC_ID_VP8,
2160  .priv_data_size = sizeof(VP8Context),
2168 };
2169 
2170 // AVCodec ff_webp_decoder = {
2171 // .name = "webp",
2172 // .long_name = NULL_IF_CONFIG_SMALL("WebP"),
2173 // .type = AVMEDIA_TYPE_VIDEO,
2174 // .id = AV_CODEC_ID_WEBP,
2175 // .priv_data_size = sizeof(VP8Context),
2176 // .init = vp8_decode_init,
2177 // .close = vp8_decode_free,
2178 // .decode = webp_decode_frame,
2179 // .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2180 // .flush = vp8_decode_flush,
2181 // .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2182 // .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2183 // };