FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP7/VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Fiona Glaser
7  * Copyright (C) 2012 Daniel Kang
8  * Copyright (C) 2014 Peter Ross
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "libavutil/imgutils.h"
28 
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 #include "vp8.h"
34 #include "vp8data.h"
35 
36 #if ARCH_ARM
37 # include "arm/vp8.h"
38 #endif
39 
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
46 #endif
47 
48 static void free_buffers(VP8Context *s)
49 {
50  int i;
51  if (s->thread_data)
52  for (i = 0; i < MAX_THREADS; i++) {
53 #if HAVE_THREADS
54  pthread_cond_destroy(&s->thread_data[i].cond);
56 #endif
58  }
59  av_freep(&s->thread_data);
62  av_freep(&s->top_nnz);
63  av_freep(&s->top_border);
64 
65  s->macroblocks = NULL;
66 }
67 
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69 {
70  int ret;
71  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73  return ret;
74  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76  return AVERROR(ENOMEM);
77  }
78  return 0;
79 }
80 
82 {
85 }
86 
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89 {
90  int ret;
91 
92  vp8_release_frame(s, dst);
93 
94  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95  return ret;
96  if (src->seg_map &&
97  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98  vp8_release_frame(s, dst);
99  return AVERROR(ENOMEM);
100  }
101 
102  return 0;
103 }
104 #endif /* CONFIG_VP8_DECODER */
105 
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107 {
108  VP8Context *s = avctx->priv_data;
109  int i;
110 
111  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112  vp8_release_frame(s, &s->frames[i]);
113  memset(s->framep, 0, sizeof(s->framep));
114 
115  if (free_mem)
116  free_buffers(s);
117 }
118 
119 static void vp8_decode_flush(AVCodecContext *avctx)
120 {
121  vp8_decode_flush_impl(avctx, 0);
122 }
123 
125 {
126  VP8Frame *frame = NULL;
127  int i;
128 
129  // find a free buffer
130  for (i = 0; i < 5; i++)
131  if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135  frame = &s->frames[i];
136  break;
137  }
138  if (i == 5) {
139  av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140  abort();
141  }
142  if (frame->tf.f->data[0])
143  vp8_release_frame(s, frame);
144 
145  return frame;
146 }
147 
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150 {
151  AVCodecContext *avctx = s->avctx;
152  int i, ret;
153 
154  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155  height != s->avctx->height) {
157 
158  ret = ff_set_dimensions(s->avctx, width, height);
159  if (ret < 0)
160  return ret;
161  }
162 
163  s->mb_width = (s->avctx->coded_width + 15) / 16;
164  s->mb_height = (s->avctx->coded_height + 15) / 16;
165 
166  s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167  FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168  if (!s->mb_layout) { // Frame threading and one thread
169  s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170  sizeof(*s->macroblocks));
172  } else // Sliced threading
173  s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174  sizeof(*s->macroblocks));
175  s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176  s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 
179  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180  !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
181  free_buffers(s);
182  return AVERROR(ENOMEM);
183  }
184 
185  for (i = 0; i < MAX_THREADS; i++) {
187  av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188  if (!s->thread_data[i].filter_strength) {
189  free_buffers(s);
190  return AVERROR(ENOMEM);
191  }
192 #if HAVE_THREADS
193  pthread_mutex_init(&s->thread_data[i].lock, NULL);
194  pthread_cond_init(&s->thread_data[i].cond, NULL);
195 #endif
196  }
197 
198  s->macroblocks = s->macroblocks_base + 1;
199 
200  return 0;
201 }
202 
204 {
205  return update_dimensions(s, width, height, IS_VP7);
206 }
207 
209 {
210  return update_dimensions(s, width, height, IS_VP8);
211 }
212 
213 
215 {
216  VP56RangeCoder *c = &s->c;
217  int i;
218 
220 
221  if (vp8_rac_get(c)) { // update segment feature data
223 
224  for (i = 0; i < 4; i++)
226 
227  for (i = 0; i < 4; i++)
229  }
230  if (s->segmentation.update_map)
231  for (i = 0; i < 3; i++)
232  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
233 }
234 
236 {
237  VP56RangeCoder *c = &s->c;
238  int i;
239 
240  for (i = 0; i < 4; i++) {
241  if (vp8_rac_get(c)) {
242  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
243 
244  if (vp8_rac_get(c))
245  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
246  }
247  }
248 
249  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250  if (vp8_rac_get(c)) {
251  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
252 
253  if (vp8_rac_get(c))
254  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
255  }
256  }
257 }
258 
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
260 {
261  const uint8_t *sizes = buf;
262  int i;
263 
264  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
265 
266  buf += 3 * (s->num_coeff_partitions - 1);
267  buf_size -= 3 * (s->num_coeff_partitions - 1);
268  if (buf_size < 0)
269  return -1;
270 
271  for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272  int size = AV_RL24(sizes + 3 * i);
273  if (buf_size - size < 0)
274  return -1;
275 
276  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
277  buf += size;
278  buf_size -= size;
279  }
280  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
281 
282  return 0;
283 }
284 
285 static void vp7_get_quants(VP8Context *s)
286 {
287  VP56RangeCoder *c = &s->c;
288 
289  int yac_qi = vp8_rac_get_uint(c, 7);
290  int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291  int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292  int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293  int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294  int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
295 
296  s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297  s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298  s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299  s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300  s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301  s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
302 }
303 
304 static void vp8_get_quants(VP8Context *s)
305 {
306  VP56RangeCoder *c = &s->c;
307  int i, base_qi;
308 
309  int yac_qi = vp8_rac_get_uint(c, 7);
310  int ydc_delta = vp8_rac_get_sint(c, 4);
311  int y2dc_delta = vp8_rac_get_sint(c, 4);
312  int y2ac_delta = vp8_rac_get_sint(c, 4);
313  int uvdc_delta = vp8_rac_get_sint(c, 4);
314  int uvac_delta = vp8_rac_get_sint(c, 4);
315 
316  for (i = 0; i < 4; i++) {
317  if (s->segmentation.enabled) {
318  base_qi = s->segmentation.base_quant[i];
319  if (!s->segmentation.absolute_vals)
320  base_qi += yac_qi;
321  } else
322  base_qi = yac_qi;
323 
324  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326  s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327  /* 101581>>16 is equivalent to 155/100 */
328  s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
331 
332  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
334  }
335 }
336 
337 /**
338  * Determine which buffers golden and altref should be updated with after this frame.
339  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
340  *
341  * Intra frames update all 3 references
342  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343  * If the update (golden|altref) flag is set, it's updated with the current frame
344  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345  * If the flag is not set, the number read means:
346  * 0: no update
347  * 1: VP56_FRAME_PREVIOUS
348  * 2: update golden with altref, or update altref with golden
349  */
351 {
352  VP56RangeCoder *c = &s->c;
353 
354  if (update)
355  return VP56_FRAME_CURRENT;
356 
357  switch (vp8_rac_get_uint(c, 2)) {
358  case 1:
359  return VP56_FRAME_PREVIOUS;
360  case 2:
362  }
363  return VP56_FRAME_NONE;
364 }
365 
367 {
368  int i, j;
369  for (i = 0; i < 4; i++)
370  for (j = 0; j < 16; j++)
371  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372  sizeof(s->prob->token[i][j]));
373 }
374 
376 {
377  VP56RangeCoder *c = &s->c;
378  int i, j, k, l, m;
379 
380  for (i = 0; i < 4; i++)
381  for (j = 0; j < 8; j++)
382  for (k = 0; k < 3; k++)
383  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
385  int prob = vp8_rac_get_uint(c, 8);
386  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
388  }
389 }
390 
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
393 
395  int mvc_size)
396 {
397  VP56RangeCoder *c = &s->c;
398  int i, j;
399 
400  if (vp8_rac_get(c))
401  for (i = 0; i < 4; i++)
402  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
403  if (vp8_rac_get(c))
404  for (i = 0; i < 3; i++)
405  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
406 
407  // 17.2 MV probability update
408  for (i = 0; i < 2; i++)
409  for (j = 0; j < mvc_size; j++)
411  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
412 }
413 
414 static void update_refs(VP8Context *s)
415 {
416  VP56RangeCoder *c = &s->c;
417 
418  int update_golden = vp8_rac_get(c);
419  int update_altref = vp8_rac_get(c);
420 
421  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
423 }
424 
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
426 {
427  int i, j;
428 
429  for (j = 1; j < 3; j++) {
430  for (i = 0; i < height / 2; i++)
431  memcpy(dst->data[j] + i * dst->linesize[j],
432  src->data[j] + i * src->linesize[j], width / 2);
433  }
434 }
435 
436 static void fade(uint8_t *dst, int dst_linesize,
437  const uint8_t *src, int src_linesize,
438  int width, int height,
439  int alpha, int beta)
440 {
441  int i, j;
442  for (j = 0; j < height; j++) {
443  for (i = 0; i < width; i++) {
444  uint8_t y = src[j * src_linesize + i];
445  dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
446  }
447  }
448 }
449 
451 {
452  int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453  int beta = (int8_t) vp8_rac_get_uint(c, 8);
454  int ret;
455 
456  if (!s->keyframe && (alpha || beta)) {
457  int width = s->mb_width * 16;
458  int height = s->mb_height * 16;
459  AVFrame *src, *dst;
460 
461  if (!s->framep[VP56_FRAME_PREVIOUS] ||
462  !s->framep[VP56_FRAME_GOLDEN]) {
463  av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464  return AVERROR_INVALIDDATA;
465  }
466 
467  dst =
468  src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
469 
470  /* preserve the golden frame, write a new previous frame */
473  if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
474  return ret;
475 
476  dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
477 
478  copy_chroma(dst, src, width, height);
479  }
480 
481  fade(dst->data[0], dst->linesize[0],
482  src->data[0], src->linesize[0],
483  width, height, alpha, beta);
484  }
485 
486  return 0;
487 }
488 
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
490 {
491  VP56RangeCoder *c = &s->c;
492  int part1_size, hscale, vscale, i, j, ret;
493  int width = s->avctx->width;
494  int height = s->avctx->height;
495 
496  s->profile = (buf[0] >> 1) & 7;
497  if (s->profile > 1) {
498  avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
499  return AVERROR_INVALIDDATA;
500  }
501 
502  s->keyframe = !(buf[0] & 1);
503  s->invisible = 0;
504  part1_size = AV_RL24(buf) >> 4;
505 
506  if (buf_size < 4 - s->profile + part1_size) {
507  av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
508  return AVERROR_INVALIDDATA;
509  }
510 
511  buf += 4 - s->profile;
512  buf_size -= 4 - s->profile;
513 
514  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
515 
516  ff_vp56_init_range_decoder(c, buf, part1_size);
517  buf += part1_size;
518  buf_size -= part1_size;
519 
520  /* A. Dimension information (keyframes only) */
521  if (s->keyframe) {
522  width = vp8_rac_get_uint(c, 12);
523  height = vp8_rac_get_uint(c, 12);
524  hscale = vp8_rac_get_uint(c, 2);
525  vscale = vp8_rac_get_uint(c, 2);
526  if (hscale || vscale)
527  avpriv_request_sample(s->avctx, "Upscaling");
528 
532  sizeof(s->prob->pred16x16));
534  sizeof(s->prob->pred8x8c));
535  for (i = 0; i < 2; i++)
536  memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
537  sizeof(vp7_mv_default_prob[i]));
538  memset(&s->segmentation, 0, sizeof(s->segmentation));
539  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
540  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
541  }
542 
543  if (s->keyframe || s->profile > 0)
544  memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
545 
546  /* B. Decoding information for all four macroblock-level features */
547  for (i = 0; i < 4; i++) {
548  s->feature_enabled[i] = vp8_rac_get(c);
549  if (s->feature_enabled[i]) {
551 
552  for (j = 0; j < 3; j++)
553  s->feature_index_prob[i][j] =
554  vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
555 
556  if (vp7_feature_value_size[s->profile][i])
557  for (j = 0; j < 4; j++)
558  s->feature_value[i][j] =
560  }
561  }
562 
563  s->segmentation.enabled = 0;
564  s->segmentation.update_map = 0;
565  s->lf_delta.enabled = 0;
566 
567  s->num_coeff_partitions = 1;
568  ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
569 
570  if (!s->macroblocks_base || /* first frame */
571  width != s->avctx->width || height != s->avctx->height ||
572  (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
573  if ((ret = vp7_update_dimensions(s, width, height)) < 0)
574  return ret;
575  }
576 
577  /* C. Dequantization indices */
578  vp7_get_quants(s);
579 
580  /* D. Golden frame update flag (a Flag) for interframes only */
581  if (!s->keyframe) {
584  }
585 
586  s->update_last = 1;
587  s->update_probabilities = 1;
588  s->fade_present = 1;
589 
590  if (s->profile > 0) {
592  if (!s->update_probabilities)
593  s->prob[1] = s->prob[0];
594 
595  if (!s->keyframe)
596  s->fade_present = vp8_rac_get(c);
597  }
598 
599  /* E. Fading information for previous frame */
600  if (s->fade_present && vp8_rac_get(c)) {
601  if ((ret = vp7_fade_frame(s ,c)) < 0)
602  return ret;
603  }
604 
605  /* F. Loop filter type */
606  if (!s->profile)
607  s->filter.simple = vp8_rac_get(c);
608 
609  /* G. DCT coefficient ordering specification */
610  if (vp8_rac_get(c))
611  for (i = 1; i < 16; i++)
612  s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
613 
614  /* H. Loop filter levels */
615  if (s->profile > 0)
616  s->filter.simple = vp8_rac_get(c);
617  s->filter.level = vp8_rac_get_uint(c, 6);
618  s->filter.sharpness = vp8_rac_get_uint(c, 3);
619 
620  /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
622 
623  s->mbskip_enabled = 0;
624 
625  /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
626  if (!s->keyframe) {
627  s->prob->intra = vp8_rac_get_uint(c, 8);
628  s->prob->last = vp8_rac_get_uint(c, 8);
630  }
631 
632  return 0;
633 }
634 
635 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
636 {
637  VP56RangeCoder *c = &s->c;
638  int header_size, hscale, vscale, ret;
639  int width = s->avctx->width;
640  int height = s->avctx->height;
641 
642  s->keyframe = !(buf[0] & 1);
643  s->profile = (buf[0]>>1) & 7;
644  s->invisible = !(buf[0] & 0x10);
645  header_size = AV_RL24(buf) >> 5;
646  buf += 3;
647  buf_size -= 3;
648 
649  if (s->profile > 3)
650  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
651 
652  if (!s->profile)
654  sizeof(s->put_pixels_tab));
655  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
657  sizeof(s->put_pixels_tab));
658 
659  if (header_size > buf_size - 7 * s->keyframe) {
660  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
661  return AVERROR_INVALIDDATA;
662  }
663 
664  if (s->keyframe) {
665  if (AV_RL24(buf) != 0x2a019d) {
667  "Invalid start code 0x%x\n", AV_RL24(buf));
668  return AVERROR_INVALIDDATA;
669  }
670  width = AV_RL16(buf + 3) & 0x3fff;
671  height = AV_RL16(buf + 5) & 0x3fff;
672  hscale = buf[4] >> 6;
673  vscale = buf[6] >> 6;
674  buf += 7;
675  buf_size -= 7;
676 
677  if (hscale || vscale)
678  avpriv_request_sample(s->avctx, "Upscaling");
679 
683  sizeof(s->prob->pred16x16));
685  sizeof(s->prob->pred8x8c));
686  memcpy(s->prob->mvc, vp8_mv_default_prob,
687  sizeof(s->prob->mvc));
688  memset(&s->segmentation, 0, sizeof(s->segmentation));
689  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
690  }
691 
692  ff_vp56_init_range_decoder(c, buf, header_size);
693  buf += header_size;
694  buf_size -= header_size;
695 
696  if (s->keyframe) {
697  s->colorspace = vp8_rac_get(c);
698  if (s->colorspace)
699  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
700  s->fullrange = vp8_rac_get(c);
701  }
702 
703  if ((s->segmentation.enabled = vp8_rac_get(c)))
705  else
706  s->segmentation.update_map = 0; // FIXME: move this to some init function?
707 
708  s->filter.simple = vp8_rac_get(c);
709  s->filter.level = vp8_rac_get_uint(c, 6);
710  s->filter.sharpness = vp8_rac_get_uint(c, 3);
711 
712  if ((s->lf_delta.enabled = vp8_rac_get(c)))
713  if (vp8_rac_get(c))
714  update_lf_deltas(s);
715 
716  if (setup_partitions(s, buf, buf_size)) {
717  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
718  return AVERROR_INVALIDDATA;
719  }
720 
721  if (!s->macroblocks_base || /* first frame */
722  width != s->avctx->width || height != s->avctx->height ||
723  (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
724  if ((ret = vp8_update_dimensions(s, width, height)) < 0)
725  return ret;
726 
727  vp8_get_quants(s);
728 
729  if (!s->keyframe) {
730  update_refs(s);
732  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
733  }
734 
735  // if we aren't saving this frame's probabilities for future frames,
736  // make a copy of the current probabilities
737  if (!(s->update_probabilities = vp8_rac_get(c)))
738  s->prob[1] = s->prob[0];
739 
740  s->update_last = s->keyframe || vp8_rac_get(c);
741 
743 
744  if ((s->mbskip_enabled = vp8_rac_get(c)))
745  s->prob->mbskip = vp8_rac_get_uint(c, 8);
746 
747  if (!s->keyframe) {
748  s->prob->intra = vp8_rac_get_uint(c, 8);
749  s->prob->last = vp8_rac_get_uint(c, 8);
750  s->prob->golden = vp8_rac_get_uint(c, 8);
752  }
753 
754  return 0;
755 }
756 
757 static av_always_inline
758 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
759 {
760  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
761  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
762 }
763 
764 /**
765  * Motion vector coding, 17.1.
766  */
768 {
769  int bit, x = 0;
770 
771  if (vp56_rac_get_prob_branchy(c, p[0])) {
772  int i;
773 
774  for (i = 0; i < 3; i++)
775  x += vp56_rac_get_prob(c, p[9 + i]) << i;
776  for (i = (vp7 ? 7 : 9); i > 3; i--)
777  x += vp56_rac_get_prob(c, p[9 + i]) << i;
778  if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
779  x += 8;
780  } else {
781  // small_mvtree
782  const uint8_t *ps = p + 2;
783  bit = vp56_rac_get_prob(c, *ps);
784  ps += 1 + 3 * bit;
785  x += 4 * bit;
786  bit = vp56_rac_get_prob(c, *ps);
787  ps += 1 + bit;
788  x += 2 * bit;
789  x += vp56_rac_get_prob(c, *ps);
790  }
791 
792  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
793 }
794 
796 {
797  return read_mv_component(c, p, 1);
798 }
799 
801 {
802  return read_mv_component(c, p, 0);
803 }
804 
805 static av_always_inline
806 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
807 {
808  if (is_vp7)
809  return vp7_submv_prob;
810 
811  if (left == top)
812  return vp8_submv_prob[4 - !!left];
813  if (!top)
814  return vp8_submv_prob[2];
815  return vp8_submv_prob[1 - !!left];
816 }
817 
818 /**
819  * Split motion vector prediction, 16.4.
820  * @returns the number of motion vectors parsed (2, 4 or 16)
821  */
822 static av_always_inline
824  int layout, int is_vp7)
825 {
826  int part_idx;
827  int n, num;
828  VP8Macroblock *top_mb;
829  VP8Macroblock *left_mb = &mb[-1];
830  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
831  const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
832  VP56mv *top_mv;
833  VP56mv *left_mv = left_mb->bmv;
834  VP56mv *cur_mv = mb->bmv;
835 
836  if (!layout) // layout is inlined, s->mb_layout is not
837  top_mb = &mb[2];
838  else
839  top_mb = &mb[-s->mb_width - 1];
840  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
841  top_mv = top_mb->bmv;
842 
846  else
847  part_idx = VP8_SPLITMVMODE_8x8;
848  } else {
849  part_idx = VP8_SPLITMVMODE_4x4;
850  }
851 
852  num = vp8_mbsplit_count[part_idx];
853  mbsplits_cur = vp8_mbsplits[part_idx],
854  firstidx = vp8_mbfirstidx[part_idx];
855  mb->partitioning = part_idx;
856 
857  for (n = 0; n < num; n++) {
858  int k = firstidx[n];
859  uint32_t left, above;
860  const uint8_t *submv_prob;
861 
862  if (!(k & 3))
863  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
864  else
865  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
866  if (k <= 3)
867  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
868  else
869  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
870 
871  submv_prob = get_submv_prob(left, above, is_vp7);
872 
873  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
874  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
875  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
876  mb->bmv[n].y = mb->mv.y +
877  read_mv_component(c, s->prob->mvc[0], is_vp7);
878  mb->bmv[n].x = mb->mv.x +
879  read_mv_component(c, s->prob->mvc[1], is_vp7);
880  } else {
881  AV_ZERO32(&mb->bmv[n]);
882  }
883  } else {
884  AV_WN32A(&mb->bmv[n], above);
885  }
886  } else {
887  AV_WN32A(&mb->bmv[n], left);
888  }
889  }
890 
891  return num;
892 }
893 
894 /**
895  * The vp7 reference decoder uses a padding macroblock column (added to right
896  * edge of the frame) to guard against illegal macroblock offsets. The
897  * algorithm has bugs that permit offsets to straddle the padding column.
898  * This function replicates those bugs.
899  *
900  * @param[out] edge_x macroblock x address
901  * @param[out] edge_y macroblock y address
902  *
903  * @return macroblock offset legal (boolean)
904  */
905 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
906  int xoffset, int yoffset, int boundary,
907  int *edge_x, int *edge_y)
908 {
909  int vwidth = mb_width + 1;
910  int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
911  if (new < boundary || new % vwidth == vwidth - 1)
912  return 0;
913  *edge_y = new / vwidth;
914  *edge_x = new % vwidth;
915  return 1;
916 }
917 
918 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
919 {
920  return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
921 }
922 
923 static av_always_inline
925  int mb_x, int mb_y, int layout)
926 {
927  VP8Macroblock *mb_edge[12];
928  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
929  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
930  int idx = CNT_ZERO;
931  VP56mv near_mv[3];
932  uint8_t cnt[3] = { 0 };
933  VP56RangeCoder *c = &s->c;
934  int i;
935 
936  AV_ZERO32(&near_mv[0]);
937  AV_ZERO32(&near_mv[1]);
938  AV_ZERO32(&near_mv[2]);
939 
940  for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
941  const VP7MVPred * pred = &vp7_mv_pred[i];
942  int edge_x, edge_y;
943 
944  if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
945  pred->yoffset, !s->profile, &edge_x, &edge_y)) {
946  VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
947  ? s->macroblocks_base + 1 + edge_x +
948  (s->mb_width + 1) * (edge_y + 1)
949  : s->macroblocks + edge_x +
950  (s->mb_height - edge_y - 1) * 2;
951  uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
952  if (mv) {
953  if (AV_RN32A(&near_mv[CNT_NEAREST])) {
954  if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
955  idx = CNT_NEAREST;
956  } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
957  if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
958  continue;
959  idx = CNT_NEAR;
960  } else {
961  AV_WN32A(&near_mv[CNT_NEAR], mv);
962  idx = CNT_NEAR;
963  }
964  } else {
965  AV_WN32A(&near_mv[CNT_NEAREST], mv);
966  idx = CNT_NEAREST;
967  }
968  } else {
969  idx = CNT_ZERO;
970  }
971  } else {
972  idx = CNT_ZERO;
973  }
974  cnt[idx] += vp7_mv_pred[i].score;
975  }
976 
978 
979  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
980  mb->mode = VP8_MVMODE_MV;
981 
982  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
983 
984  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
985 
986  if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
987  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
988  else
989  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
990 
991  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
992  mb->mode = VP8_MVMODE_SPLIT;
993  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
994  } else {
995  mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
996  mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
997  mb->bmv[0] = mb->mv;
998  }
999  } else {
1000  mb->mv = near_mv[CNT_NEAR];
1001  mb->bmv[0] = mb->mv;
1002  }
1003  } else {
1004  mb->mv = near_mv[CNT_NEAREST];
1005  mb->bmv[0] = mb->mv;
1006  }
1007  } else {
1008  mb->mode = VP8_MVMODE_ZERO;
1009  AV_ZERO32(&mb->mv);
1010  mb->bmv[0] = mb->mv;
1011  }
1012 }
1013 
1014 static av_always_inline
1016  int mb_x, int mb_y, int layout)
1017 {
1018  VP8Macroblock *mb_edge[3] = { 0 /* top */,
1019  mb - 1 /* left */,
1020  0 /* top-left */ };
1021  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1022  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1023  int idx = CNT_ZERO;
1024  int cur_sign_bias = s->sign_bias[mb->ref_frame];
1025  int8_t *sign_bias = s->sign_bias;
1026  VP56mv near_mv[4];
1027  uint8_t cnt[4] = { 0 };
1028  VP56RangeCoder *c = &s->c;
1029 
1030  if (!layout) { // layout is inlined (s->mb_layout is not)
1031  mb_edge[0] = mb + 2;
1032  mb_edge[2] = mb + 1;
1033  } else {
1034  mb_edge[0] = mb - s->mb_width - 1;
1035  mb_edge[2] = mb - s->mb_width - 2;
1036  }
1037 
1038  AV_ZERO32(&near_mv[0]);
1039  AV_ZERO32(&near_mv[1]);
1040  AV_ZERO32(&near_mv[2]);
1041 
1042  /* Process MB on top, left and top-left */
1043 #define MV_EDGE_CHECK(n) \
1044  { \
1045  VP8Macroblock *edge = mb_edge[n]; \
1046  int edge_ref = edge->ref_frame; \
1047  if (edge_ref != VP56_FRAME_CURRENT) { \
1048  uint32_t mv = AV_RN32A(&edge->mv); \
1049  if (mv) { \
1050  if (cur_sign_bias != sign_bias[edge_ref]) { \
1051  /* SWAR negate of the values in mv. */ \
1052  mv = ~mv; \
1053  mv = ((mv & 0x7fff7fff) + \
1054  0x00010001) ^ (mv & 0x80008000); \
1055  } \
1056  if (!n || mv != AV_RN32A(&near_mv[idx])) \
1057  AV_WN32A(&near_mv[++idx], mv); \
1058  cnt[idx] += 1 + (n != 2); \
1059  } else \
1060  cnt[CNT_ZERO] += 1 + (n != 2); \
1061  } \
1062  }
1063 
1064  MV_EDGE_CHECK(0)
1065  MV_EDGE_CHECK(1)
1066  MV_EDGE_CHECK(2)
1067 
1069  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1070  mb->mode = VP8_MVMODE_MV;
1071 
1072  /* If we have three distinct MVs, merge first and last if they're the same */
1073  if (cnt[CNT_SPLITMV] &&
1074  AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1075  cnt[CNT_NEAREST] += 1;
1076 
1077  /* Swap near and nearest if necessary */
1078  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1079  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1080  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1081  }
1082 
1083  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1084  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1085  /* Choose the best mv out of 0,0 and the nearest mv */
1086  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1087  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1088  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1089  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1090 
1091  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1092  mb->mode = VP8_MVMODE_SPLIT;
1093  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1094  } else {
1095  mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1096  mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1097  mb->bmv[0] = mb->mv;
1098  }
1099  } else {
1100  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1101  mb->bmv[0] = mb->mv;
1102  }
1103  } else {
1104  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1105  mb->bmv[0] = mb->mv;
1106  }
1107  } else {
1108  mb->mode = VP8_MVMODE_ZERO;
1109  AV_ZERO32(&mb->mv);
1110  mb->bmv[0] = mb->mv;
1111  }
1112 }
1113 
1114 static av_always_inline
1116  int mb_x, int keyframe, int layout)
1117 {
1118  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1119 
1120  if (layout) {
1121  VP8Macroblock *mb_top = mb - s->mb_width - 1;
1122  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1123  }
1124  if (keyframe) {
1125  int x, y;
1126  uint8_t *top;
1127  uint8_t *const left = s->intra4x4_pred_mode_left;
1128  if (layout)
1129  top = mb->intra4x4_pred_mode_top;
1130  else
1131  top = s->intra4x4_pred_mode_top + 4 * mb_x;
1132  for (y = 0; y < 4; y++) {
1133  for (x = 0; x < 4; x++) {
1134  const uint8_t *ctx;
1135  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1136  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1137  left[y] = top[x] = *intra4x4;
1138  intra4x4++;
1139  }
1140  }
1141  } else {
1142  int i;
1143  for (i = 0; i < 16; i++)
1144  intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1146  }
1147 }
1148 
1149 static av_always_inline
1150 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1151  uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1152 {
1153  VP56RangeCoder *c = &s->c;
1154  const char *vp7_feature_name[] = { "q-index",
1155  "lf-delta",
1156  "partial-golden-update",
1157  "blit-pitch" };
1158  if (is_vp7) {
1159  int i;
1160  *segment = 0;
1161  for (i = 0; i < 4; i++) {
1162  if (s->feature_enabled[i]) {
1165  s->feature_index_prob[i]);
1167  "Feature %s present in macroblock (value 0x%x)\n",
1168  vp7_feature_name[i], s->feature_value[i][index]);
1169  }
1170  }
1171  }
1172  } else if (s->segmentation.update_map) {
1173  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1174  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1175  } else if (s->segmentation.enabled)
1176  *segment = ref ? *ref : *segment;
1177  mb->segment = *segment;
1178 
1179  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1180 
1181  if (s->keyframe) {
1184 
1185  if (mb->mode == MODE_I4x4) {
1186  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1187  } else {
1188  const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1189  : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1190  if (s->mb_layout)
1191  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1192  else
1193  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1194  AV_WN32A(s->intra4x4_pred_mode_left, modes);
1195  }
1196 
1200  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1201  // inter MB, 16.2
1202  if (vp56_rac_get_prob_branchy(c, s->prob->last))
1203  mb->ref_frame =
1204  (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1206  else
1208  s->ref_count[mb->ref_frame - 1]++;
1209 
1210  // motion vectors, 16.3
1211  if (is_vp7)
1212  vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1213  else
1214  vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1215  } else {
1216  // intra MB, 16.1
1218 
1219  if (mb->mode == MODE_I4x4)
1220  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1221 
1223  s->prob->pred8x8c);
1226  AV_ZERO32(&mb->bmv[0]);
1227  }
1228 }
1229 
1230 /**
1231  * @param r arithmetic bitstream reader context
1232  * @param block destination for block coefficients
1233  * @param probs probabilities to use when reading trees from the bitstream
1234  * @param i initial coeff index, 0 unless a separate DC block is coded
1235  * @param qmul array holding the dc/ac dequant factor at position 0/1
1236  *
1237  * @return 0 if no coeffs were decoded
1238  * otherwise, the index of the last coeff decoded plus one
1239  */
1240 static av_always_inline
1242  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1243  int i, uint8_t *token_prob, int16_t qmul[2],
1244  const uint8_t scan[16], int vp7)
1245 {
1246  VP56RangeCoder c = *r;
1247  goto skip_eob;
1248  do {
1249  int coeff;
1250 restart:
1251  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1252  break;
1253 
1254 skip_eob:
1255  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1256  if (++i == 16)
1257  break; // invalid input; blocks should end with EOB
1258  token_prob = probs[i][0];
1259  if (vp7)
1260  goto restart;
1261  goto skip_eob;
1262  }
1263 
1264  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1265  coeff = 1;
1266  token_prob = probs[i + 1][1];
1267  } else {
1268  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1269  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1270  if (coeff)
1271  coeff += vp56_rac_get_prob(&c, token_prob[5]);
1272  coeff += 2;
1273  } else {
1274  // DCT_CAT*
1275  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1276  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1277  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1278  } else { // DCT_CAT2
1279  coeff = 7;
1280  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1281  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1282  }
1283  } else { // DCT_CAT3 and up
1284  int a = vp56_rac_get_prob(&c, token_prob[8]);
1285  int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1286  int cat = (a << 1) + b;
1287  coeff = 3 + (8 << cat);
1288  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1289  }
1290  }
1291  token_prob = probs[i + 1][2];
1292  }
1293  block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1294  } while (++i < 16);
1295 
1296  *r = c;
1297  return i;
1298 }
1299 
1300 static av_always_inline
1301 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1302 {
1303  int16_t dc = block[0];
1304  int ret = 0;
1305 
1306  if (pred[1] > 3) {
1307  dc += pred[0];
1308  ret = 1;
1309  }
1310 
1311  if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1312  block[0] = pred[0] = dc;
1313  pred[1] = 0;
1314  } else {
1315  if (pred[0] == dc)
1316  pred[1]++;
1317  block[0] = pred[0] = dc;
1318  }
1319 
1320  return ret;
1321 }
1322 
1324  int16_t block[16],
1325  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1326  int i, uint8_t *token_prob,
1327  int16_t qmul[2],
1328  const uint8_t scan[16])
1329 {
1330  return decode_block_coeffs_internal(r, block, probs, i,
1331  token_prob, qmul, scan, IS_VP7);
1332 }
1333 
1334 #ifndef vp8_decode_block_coeffs_internal
1336  int16_t block[16],
1337  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338  int i, uint8_t *token_prob,
1339  int16_t qmul[2])
1340 {
1341  return decode_block_coeffs_internal(r, block, probs, i,
1342  token_prob, qmul, zigzag_scan, IS_VP8);
1343 }
1344 #endif
1345 
1346 /**
1347  * @param c arithmetic bitstream reader context
1348  * @param block destination for block coefficients
1349  * @param probs probabilities to use when reading trees from the bitstream
1350  * @param i initial coeff index, 0 unless a separate DC block is coded
1351  * @param zero_nhood the initial prediction context for number of surrounding
1352  * all-zero blocks (only left/top, so 0-2)
1353  * @param qmul array holding the dc/ac dequant factor at position 0/1
1354  * @param scan scan pattern (VP7 only)
1355  *
1356  * @return 0 if no coeffs were decoded
1357  * otherwise, the index of the last coeff decoded plus one
1358  */
1359 static av_always_inline
1361  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1362  int i, int zero_nhood, int16_t qmul[2],
1363  const uint8_t scan[16], int vp7)
1364 {
1365  uint8_t *token_prob = probs[i][zero_nhood];
1366  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1367  return 0;
1368  return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1369  token_prob, qmul, scan)
1370  : vp8_decode_block_coeffs_internal(c, block, probs, i,
1371  token_prob, qmul);
1372 }
1373 
1374 static av_always_inline
1376  VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1377  int is_vp7)
1378 {
1379  int i, x, y, luma_start = 0, luma_ctx = 3;
1380  int nnz_pred, nnz, nnz_total = 0;
1381  int segment = mb->segment;
1382  int block_dc = 0;
1383 
1384  if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1385  nnz_pred = t_nnz[8] + l_nnz[8];
1386 
1387  // decode DC values and do hadamard
1388  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1389  nnz_pred, s->qmat[segment].luma_dc_qmul,
1390  zigzag_scan, is_vp7);
1391  l_nnz[8] = t_nnz[8] = !!nnz;
1392 
1393  if (is_vp7 && mb->mode > MODE_I4x4) {
1394  nnz |= inter_predict_dc(td->block_dc,
1395  s->inter_dc_pred[mb->ref_frame - 1]);
1396  }
1397 
1398  if (nnz) {
1399  nnz_total += nnz;
1400  block_dc = 1;
1401  if (nnz == 1)
1402  s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1403  else
1404  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1405  }
1406  luma_start = 1;
1407  luma_ctx = 0;
1408  }
1409 
1410  // luma blocks
1411  for (y = 0; y < 4; y++)
1412  for (x = 0; x < 4; x++) {
1413  nnz_pred = l_nnz[y] + t_nnz[x];
1414  nnz = decode_block_coeffs(c, td->block[y][x],
1415  s->prob->token[luma_ctx],
1416  luma_start, nnz_pred,
1417  s->qmat[segment].luma_qmul,
1418  s->prob[0].scan, is_vp7);
1419  /* nnz+block_dc may be one more than the actual last index,
1420  * but we don't care */
1421  td->non_zero_count_cache[y][x] = nnz + block_dc;
1422  t_nnz[x] = l_nnz[y] = !!nnz;
1423  nnz_total += nnz;
1424  }
1425 
1426  // chroma blocks
1427  // TODO: what to do about dimensions? 2nd dim for luma is x,
1428  // but for chroma it's (y<<1)|x
1429  for (i = 4; i < 6; i++)
1430  for (y = 0; y < 2; y++)
1431  for (x = 0; x < 2; x++) {
1432  nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1433  nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1434  s->prob->token[2], 0, nnz_pred,
1435  s->qmat[segment].chroma_qmul,
1436  s->prob[0].scan, is_vp7);
1437  td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1438  t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1439  nnz_total += nnz;
1440  }
1441 
1442  // if there were no coded coeffs despite the macroblock not being marked skip,
1443  // we MUST not do the inner loop filter and should not do IDCT
1444  // Since skip isn't used for bitstream prediction, just manually set it.
1445  if (!nnz_total)
1446  mb->skip = 1;
1447 }
1448 
1449 static av_always_inline
1450 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1451  uint8_t *src_cb, uint8_t *src_cr,
1452  int linesize, int uvlinesize, int simple)
1453 {
1454  AV_COPY128(top_border, src_y + 15 * linesize);
1455  if (!simple) {
1456  AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1457  AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1458  }
1459 }
1460 
1461 static av_always_inline
1462 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1463  uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1464  int mb_y, int mb_width, int simple, int xchg)
1465 {
1466  uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1467  src_y -= linesize;
1468  src_cb -= uvlinesize;
1469  src_cr -= uvlinesize;
1470 
1471 #define XCHG(a, b, xchg) \
1472  do { \
1473  if (xchg) \
1474  AV_SWAP64(b, a); \
1475  else \
1476  AV_COPY64(b, a); \
1477  } while (0)
1478 
1479  XCHG(top_border_m1 + 8, src_y - 8, xchg);
1480  XCHG(top_border, src_y, xchg);
1481  XCHG(top_border + 8, src_y + 8, 1);
1482  if (mb_x < mb_width - 1)
1483  XCHG(top_border + 32, src_y + 16, 1);
1484 
1485  // only copy chroma for normal loop filter
1486  // or to initialize the top row to 127
1487  if (!simple || !mb_y) {
1488  XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1489  XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1490  XCHG(top_border + 16, src_cb, 1);
1491  XCHG(top_border + 24, src_cr, 1);
1492  }
1493 }
1494 
1495 static av_always_inline
1496 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1497 {
1498  if (!mb_x)
1499  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1500  else
1501  return mb_y ? mode : LEFT_DC_PRED8x8;
1502 }
1503 
1504 static av_always_inline
1505 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1506 {
1507  if (!mb_x)
1508  return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1509  else
1510  return mb_y ? mode : HOR_PRED8x8;
1511 }
1512 
1513 static av_always_inline
1514 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1515 {
1516  switch (mode) {
1517  case DC_PRED8x8:
1518  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1519  case VERT_PRED8x8:
1520  return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1521  case HOR_PRED8x8:
1522  return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1523  case PLANE_PRED8x8: /* TM */
1524  return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1525  }
1526  return mode;
1527 }
1528 
1529 static av_always_inline
1530 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1531 {
1532  if (!mb_x) {
1533  return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1534  } else {
1535  return mb_y ? mode : HOR_VP8_PRED;
1536  }
1537 }
1538 
1539 static av_always_inline
1540 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1541  int *copy_buf, int vp7)
1542 {
1543  switch (mode) {
1544  case VERT_PRED:
1545  if (!mb_x && mb_y) {
1546  *copy_buf = 1;
1547  return mode;
1548  }
1549  /* fall-through */
1550  case DIAG_DOWN_LEFT_PRED:
1551  case VERT_LEFT_PRED:
1552  return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1553  case HOR_PRED:
1554  if (!mb_y) {
1555  *copy_buf = 1;
1556  return mode;
1557  }
1558  /* fall-through */
1559  case HOR_UP_PRED:
1560  return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1561  case TM_VP8_PRED:
1562  return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1563  case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1564  * as 16x16/8x8 DC */
1565  case DIAG_DOWN_RIGHT_PRED:
1566  case VERT_RIGHT_PRED:
1567  case HOR_DOWN_PRED:
1568  if (!mb_y || !mb_x)
1569  *copy_buf = 1;
1570  return mode;
1571  }
1572  return mode;
1573 }
1574 
1575 static av_always_inline
1577  VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1578 {
1579  int x, y, mode, nnz;
1580  uint32_t tr;
1581 
1582  /* for the first row, we need to run xchg_mb_border to init the top edge
1583  * to 127 otherwise, skip it if we aren't going to deblock */
1584  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1585  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1586  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1587  s->filter.simple, 1);
1588 
1589  if (mb->mode < MODE_I4x4) {
1590  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1591  s->hpc.pred16x16[mode](dst[0], s->linesize);
1592  } else {
1593  uint8_t *ptr = dst[0];
1594  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1595  const uint8_t lo = is_vp7 ? 128 : 127;
1596  const uint8_t hi = is_vp7 ? 128 : 129;
1597  uint8_t tr_top[4] = { lo, lo, lo, lo };
1598 
1599  // all blocks on the right edge of the macroblock use bottom edge
1600  // the top macroblock for their topright edge
1601  uint8_t *tr_right = ptr - s->linesize + 16;
1602 
1603  // if we're on the right edge of the frame, said edge is extended
1604  // from the top macroblock
1605  if (mb_y && mb_x == s->mb_width - 1) {
1606  tr = tr_right[-1] * 0x01010101u;
1607  tr_right = (uint8_t *) &tr;
1608  }
1609 
1610  if (mb->skip)
1612 
1613  for (y = 0; y < 4; y++) {
1614  uint8_t *topright = ptr + 4 - s->linesize;
1615  for (x = 0; x < 4; x++) {
1616  int copy = 0, linesize = s->linesize;
1617  uint8_t *dst = ptr + 4 * x;
1618  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1619 
1620  if ((y == 0 || x == 3) && mb_y == 0) {
1621  topright = tr_top;
1622  } else if (x == 3)
1623  topright = tr_right;
1624 
1625  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1626  mb_y + y, &copy, is_vp7);
1627  if (copy) {
1628  dst = copy_dst + 12;
1629  linesize = 8;
1630  if (!(mb_y + y)) {
1631  copy_dst[3] = lo;
1632  AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1633  } else {
1634  AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1635  if (!(mb_x + x)) {
1636  copy_dst[3] = hi;
1637  } else {
1638  copy_dst[3] = ptr[4 * x - s->linesize - 1];
1639  }
1640  }
1641  if (!(mb_x + x)) {
1642  copy_dst[11] =
1643  copy_dst[19] =
1644  copy_dst[27] =
1645  copy_dst[35] = hi;
1646  } else {
1647  copy_dst[11] = ptr[4 * x - 1];
1648  copy_dst[19] = ptr[4 * x + s->linesize - 1];
1649  copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1650  copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1651  }
1652  }
1653  s->hpc.pred4x4[mode](dst, topright, linesize);
1654  if (copy) {
1655  AV_COPY32(ptr + 4 * x, copy_dst + 12);
1656  AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1657  AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1658  AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1659  }
1660 
1661  nnz = td->non_zero_count_cache[y][x];
1662  if (nnz) {
1663  if (nnz == 1)
1664  s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1665  td->block[y][x], s->linesize);
1666  else
1667  s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1668  td->block[y][x], s->linesize);
1669  }
1670  topright += 4;
1671  }
1672 
1673  ptr += 4 * s->linesize;
1674  intra4x4 += 4;
1675  }
1676  }
1677 
1679  mb_x, mb_y, is_vp7);
1680  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1681  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1682 
1683  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1684  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1685  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1686  s->filter.simple, 0);
1687 }
1688 
1689 static const uint8_t subpel_idx[3][8] = {
1690  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1691  // also function pointer index
1692  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1693  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1694 };
1695 
1696 /**
1697  * luma MC function
1698  *
1699  * @param s VP8 decoding context
1700  * @param dst target buffer for block data at block position
1701  * @param ref reference picture buffer at origin (0, 0)
1702  * @param mv motion vector (relative to block position) to get pixel data from
1703  * @param x_off horizontal position of block from origin (0, 0)
1704  * @param y_off vertical position of block from origin (0, 0)
1705  * @param block_w width of block (16, 8 or 4)
1706  * @param block_h height of block (always same as block_w)
1707  * @param width width of src/dst plane data
1708  * @param height height of src/dst plane data
1709  * @param linesize size of a single line of plane data, including padding
1710  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1711  */
1712 static av_always_inline
1714  ThreadFrame *ref, const VP56mv *mv,
1715  int x_off, int y_off, int block_w, int block_h,
1716  int width, int height, ptrdiff_t linesize,
1717  vp8_mc_func mc_func[3][3])
1718 {
1719  uint8_t *src = ref->f->data[0];
1720 
1721  if (AV_RN32A(mv)) {
1722  int src_linesize = linesize;
1723 
1724  int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1725  int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1726 
1727  x_off += mv->x >> 2;
1728  y_off += mv->y >> 2;
1729 
1730  // edge emulation
1731  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1732  src += y_off * linesize + x_off;
1733  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1734  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1736  src - my_idx * linesize - mx_idx,
1737  EDGE_EMU_LINESIZE, linesize,
1738  block_w + subpel_idx[1][mx],
1739  block_h + subpel_idx[1][my],
1740  x_off - mx_idx, y_off - my_idx,
1741  width, height);
1742  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1743  src_linesize = EDGE_EMU_LINESIZE;
1744  }
1745  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1746  } else {
1747  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1748  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1749  linesize, block_h, 0, 0);
1750  }
1751 }
1752 
1753 /**
1754  * chroma MC function
1755  *
1756  * @param s VP8 decoding context
1757  * @param dst1 target buffer for block data at block position (U plane)
1758  * @param dst2 target buffer for block data at block position (V plane)
1759  * @param ref reference picture buffer at origin (0, 0)
1760  * @param mv motion vector (relative to block position) to get pixel data from
1761  * @param x_off horizontal position of block from origin (0, 0)
1762  * @param y_off vertical position of block from origin (0, 0)
1763  * @param block_w width of block (16, 8 or 4)
1764  * @param block_h height of block (always same as block_w)
1765  * @param width width of src/dst plane data
1766  * @param height height of src/dst plane data
1767  * @param linesize size of a single line of plane data, including padding
1768  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1769  */
1770 static av_always_inline
1772  uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1773  int x_off, int y_off, int block_w, int block_h,
1774  int width, int height, ptrdiff_t linesize,
1775  vp8_mc_func mc_func[3][3])
1776 {
1777  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1778 
1779  if (AV_RN32A(mv)) {
1780  int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1781  int my = mv->y & 7, my_idx = subpel_idx[0][my];
1782 
1783  x_off += mv->x >> 3;
1784  y_off += mv->y >> 3;
1785 
1786  // edge emulation
1787  src1 += y_off * linesize + x_off;
1788  src2 += y_off * linesize + x_off;
1789  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1790  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1791  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1793  src1 - my_idx * linesize - mx_idx,
1794  EDGE_EMU_LINESIZE, linesize,
1795  block_w + subpel_idx[1][mx],
1796  block_h + subpel_idx[1][my],
1797  x_off - mx_idx, y_off - my_idx, width, height);
1798  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1799  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1800 
1802  src2 - my_idx * linesize - mx_idx,
1803  EDGE_EMU_LINESIZE, linesize,
1804  block_w + subpel_idx[1][mx],
1805  block_h + subpel_idx[1][my],
1806  x_off - mx_idx, y_off - my_idx, width, height);
1807  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1808  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1809  } else {
1810  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1811  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1812  }
1813  } else {
1814  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1815  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1816  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1817  }
1818 }
1819 
1820 static av_always_inline
1822  ThreadFrame *ref_frame, int x_off, int y_off,
1823  int bx_off, int by_off, int block_w, int block_h,
1824  int width, int height, VP56mv *mv)
1825 {
1826  VP56mv uvmv = *mv;
1827 
1828  /* Y */
1829  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1830  ref_frame, mv, x_off + bx_off, y_off + by_off,
1831  block_w, block_h, width, height, s->linesize,
1832  s->put_pixels_tab[block_w == 8]);
1833 
1834  /* U/V */
1835  if (s->profile == 3) {
1836  /* this block only applies VP8; it is safe to check
1837  * only the profile, as VP7 profile <= 1 */
1838  uvmv.x &= ~7;
1839  uvmv.y &= ~7;
1840  }
1841  x_off >>= 1;
1842  y_off >>= 1;
1843  bx_off >>= 1;
1844  by_off >>= 1;
1845  width >>= 1;
1846  height >>= 1;
1847  block_w >>= 1;
1848  block_h >>= 1;
1849  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1850  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1851  &uvmv, x_off + bx_off, y_off + by_off,
1852  block_w, block_h, width, height, s->uvlinesize,
1853  s->put_pixels_tab[1 + (block_w == 4)]);
1854 }
1855 
1856 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1857  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1858 static av_always_inline
1859 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1860  int mb_xy, int ref)
1861 {
1862  /* Don't prefetch refs that haven't been used very often this frame. */
1863  if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1864  int x_off = mb_x << 4, y_off = mb_y << 4;
1865  int mx = (mb->mv.x >> 2) + x_off + 8;
1866  int my = (mb->mv.y >> 2) + y_off;
1867  uint8_t **src = s->framep[ref]->tf.f->data;
1868  int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1869  /* For threading, a ff_thread_await_progress here might be useful, but
1870  * it actually slows down the decoder. Since a bad prefetch doesn't
1871  * generate bad decoder output, we don't run it here. */
1872  s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1873  off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1874  s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1875  }
1876 }
1877 
1878 /**
1879  * Apply motion vectors to prediction buffer, chapter 18.
1880  */
1881 static av_always_inline
1883  VP8Macroblock *mb, int mb_x, int mb_y)
1884 {
1885  int x_off = mb_x << 4, y_off = mb_y << 4;
1886  int width = 16 * s->mb_width, height = 16 * s->mb_height;
1887  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1888  VP56mv *bmv = mb->bmv;
1889 
1890  switch (mb->partitioning) {
1891  case VP8_SPLITMVMODE_NONE:
1892  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1893  0, 0, 16, 16, width, height, &mb->mv);
1894  break;
1895  case VP8_SPLITMVMODE_4x4: {
1896  int x, y;
1897  VP56mv uvmv;
1898 
1899  /* Y */
1900  for (y = 0; y < 4; y++) {
1901  for (x = 0; x < 4; x++) {
1902  vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1903  ref, &bmv[4 * y + x],
1904  4 * x + x_off, 4 * y + y_off, 4, 4,
1905  width, height, s->linesize,
1906  s->put_pixels_tab[2]);
1907  }
1908  }
1909 
1910  /* U/V */
1911  x_off >>= 1;
1912  y_off >>= 1;
1913  width >>= 1;
1914  height >>= 1;
1915  for (y = 0; y < 2; y++) {
1916  for (x = 0; x < 2; x++) {
1917  uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1918  mb->bmv[2 * y * 4 + 2 * x + 1].x +
1919  mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1920  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1921  uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1922  mb->bmv[2 * y * 4 + 2 * x + 1].y +
1923  mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1924  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1925  uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1926  uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1927  if (s->profile == 3) {
1928  uvmv.x &= ~7;
1929  uvmv.y &= ~7;
1930  }
1931  vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1932  dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1933  &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1934  width, height, s->uvlinesize,
1935  s->put_pixels_tab[2]);
1936  }
1937  }
1938  break;
1939  }
1940  case VP8_SPLITMVMODE_16x8:
1941  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1942  0, 0, 16, 8, width, height, &bmv[0]);
1943  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1944  0, 8, 16, 8, width, height, &bmv[1]);
1945  break;
1946  case VP8_SPLITMVMODE_8x16:
1947  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1948  0, 0, 8, 16, width, height, &bmv[0]);
1949  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1950  8, 0, 8, 16, width, height, &bmv[1]);
1951  break;
1952  case VP8_SPLITMVMODE_8x8:
1953  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954  0, 0, 8, 8, width, height, &bmv[0]);
1955  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956  8, 0, 8, 8, width, height, &bmv[1]);
1957  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1958  0, 8, 8, 8, width, height, &bmv[2]);
1959  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960  8, 8, 8, 8, width, height, &bmv[3]);
1961  break;
1962  }
1963 }
1964 
1965 static av_always_inline
1967 {
1968  int x, y, ch;
1969 
1970  if (mb->mode != MODE_I4x4) {
1971  uint8_t *y_dst = dst[0];
1972  for (y = 0; y < 4; y++) {
1973  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1974  if (nnz4) {
1975  if (nnz4 & ~0x01010101) {
1976  for (x = 0; x < 4; x++) {
1977  if ((uint8_t) nnz4 == 1)
1978  s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1979  td->block[y][x],
1980  s->linesize);
1981  else if ((uint8_t) nnz4 > 1)
1982  s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1983  td->block[y][x],
1984  s->linesize);
1985  nnz4 >>= 8;
1986  if (!nnz4)
1987  break;
1988  }
1989  } else {
1990  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1991  }
1992  }
1993  y_dst += 4 * s->linesize;
1994  }
1995  }
1996 
1997  for (ch = 0; ch < 2; ch++) {
1998  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1999  if (nnz4) {
2000  uint8_t *ch_dst = dst[1 + ch];
2001  if (nnz4 & ~0x01010101) {
2002  for (y = 0; y < 2; y++) {
2003  for (x = 0; x < 2; x++) {
2004  if ((uint8_t) nnz4 == 1)
2005  s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2006  td->block[4 + ch][(y << 1) + x],
2007  s->uvlinesize);
2008  else if ((uint8_t) nnz4 > 1)
2009  s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2010  td->block[4 + ch][(y << 1) + x],
2011  s->uvlinesize);
2012  nnz4 >>= 8;
2013  if (!nnz4)
2014  goto chroma_idct_end;
2015  }
2016  ch_dst += 4 * s->uvlinesize;
2017  }
2018  } else {
2019  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2020  }
2021  }
2022 chroma_idct_end:
2023  ;
2024  }
2025 }
2026 
2027 static av_always_inline
2029  VP8FilterStrength *f, int is_vp7)
2030 {
2031  int interior_limit, filter_level;
2032 
2033  if (s->segmentation.enabled) {
2034  filter_level = s->segmentation.filter_level[mb->segment];
2035  if (!s->segmentation.absolute_vals)
2036  filter_level += s->filter.level;
2037  } else
2038  filter_level = s->filter.level;
2039 
2040  if (s->lf_delta.enabled) {
2041  filter_level += s->lf_delta.ref[mb->ref_frame];
2042  filter_level += s->lf_delta.mode[mb->mode];
2043  }
2044 
2045  filter_level = av_clip_uintp2(filter_level, 6);
2046 
2047  interior_limit = filter_level;
2048  if (s->filter.sharpness) {
2049  interior_limit >>= (s->filter.sharpness + 3) >> 2;
2050  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2051  }
2052  interior_limit = FFMAX(interior_limit, 1);
2053 
2054  f->filter_level = filter_level;
2055  f->inner_limit = interior_limit;
2056  f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2057  mb->mode == VP8_MVMODE_SPLIT;
2058 }
2059 
2060 static av_always_inline
2062  int mb_x, int mb_y, int is_vp7)
2063 {
2064  int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2065  int filter_level = f->filter_level;
2066  int inner_limit = f->inner_limit;
2067  int inner_filter = f->inner_filter;
2068  int linesize = s->linesize;
2069  int uvlinesize = s->uvlinesize;
2070  static const uint8_t hev_thresh_lut[2][64] = {
2071  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2072  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2073  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2074  3, 3, 3, 3 },
2075  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2076  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2077  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2078  2, 2, 2, 2 }
2079  };
2080 
2081  if (!filter_level)
2082  return;
2083 
2084  if (is_vp7) {
2085  bedge_lim_y = filter_level;
2086  bedge_lim_uv = filter_level * 2;
2087  mbedge_lim = filter_level + 2;
2088  } else {
2089  bedge_lim_y =
2090  bedge_lim_uv = filter_level * 2 + inner_limit;
2091  mbedge_lim = bedge_lim_y + 4;
2092  }
2093 
2094  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2095 
2096  if (mb_x) {
2097  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2098  mbedge_lim, inner_limit, hev_thresh);
2099  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2100  mbedge_lim, inner_limit, hev_thresh);
2101  }
2102 
2103 #define H_LOOP_FILTER_16Y_INNER(cond) \
2104  if (cond && inner_filter) { \
2105  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2106  bedge_lim_y, inner_limit, \
2107  hev_thresh); \
2108  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2109  bedge_lim_y, inner_limit, \
2110  hev_thresh); \
2111  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2112  bedge_lim_y, inner_limit, \
2113  hev_thresh); \
2114  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2115  uvlinesize, bedge_lim_uv, \
2116  inner_limit, hev_thresh); \
2117  }
2118 
2119  H_LOOP_FILTER_16Y_INNER(!is_vp7)
2120 
2121  if (mb_y) {
2122  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2123  mbedge_lim, inner_limit, hev_thresh);
2124  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2125  mbedge_lim, inner_limit, hev_thresh);
2126  }
2127 
2128  if (inner_filter) {
2129  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2130  linesize, bedge_lim_y,
2131  inner_limit, hev_thresh);
2132  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2133  linesize, bedge_lim_y,
2134  inner_limit, hev_thresh);
2135  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2136  linesize, bedge_lim_y,
2137  inner_limit, hev_thresh);
2138  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2139  dst[2] + 4 * uvlinesize,
2140  uvlinesize, bedge_lim_uv,
2141  inner_limit, hev_thresh);
2142  }
2143 
2144  H_LOOP_FILTER_16Y_INNER(is_vp7)
2145 }
2146 
2147 static av_always_inline
2149  int mb_x, int mb_y)
2150 {
2151  int mbedge_lim, bedge_lim;
2152  int filter_level = f->filter_level;
2153  int inner_limit = f->inner_limit;
2154  int inner_filter = f->inner_filter;
2155  int linesize = s->linesize;
2156 
2157  if (!filter_level)
2158  return;
2159 
2160  bedge_lim = 2 * filter_level + inner_limit;
2161  mbedge_lim = bedge_lim + 4;
2162 
2163  if (mb_x)
2164  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2165  if (inner_filter) {
2166  s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2167  s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2168  s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2169  }
2170 
2171  if (mb_y)
2172  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2173  if (inner_filter) {
2174  s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2175  s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2176  s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2177  }
2178 }
2179 
2180 #define MARGIN (16 << 2)
2181 static av_always_inline
2183  VP8Frame *prev_frame, int is_vp7)
2184 {
2185  VP8Context *s = avctx->priv_data;
2186  int mb_x, mb_y;
2187 
2188  s->mv_min.y = -MARGIN;
2189  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2190  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2192  ((s->mb_width + 1) * (mb_y + 1) + 1);
2193  int mb_xy = mb_y * s->mb_width;
2194 
2195  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2196 
2197  s->mv_min.x = -MARGIN;
2198  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2199  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2200  if (mb_y == 0)
2201  AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2202  DC_PRED * 0x01010101);
2203  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2204  prev_frame && prev_frame->seg_map ?
2205  prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2206  s->mv_min.x -= 64;
2207  s->mv_max.x -= 64;
2208  }
2209  s->mv_min.y -= 64;
2210  s->mv_max.y -= 64;
2211  }
2212 }
2213 
2214 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2215  VP8Frame *prev_frame)
2216 {
2217  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2218 }
2219 
2220 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2221  VP8Frame *prev_frame)
2222 {
2223  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2224 }
2225 
2226 #if HAVE_THREADS
2227 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2228  do { \
2229  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2230  if (otd->thread_mb_pos < tmp) { \
2231  pthread_mutex_lock(&otd->lock); \
2232  td->wait_mb_pos = tmp; \
2233  do { \
2234  if (otd->thread_mb_pos >= tmp) \
2235  break; \
2236  pthread_cond_wait(&otd->cond, &otd->lock); \
2237  } while (1); \
2238  td->wait_mb_pos = INT_MAX; \
2239  pthread_mutex_unlock(&otd->lock); \
2240  } \
2241  } while (0)
2242 
2243 #define update_pos(td, mb_y, mb_x) \
2244  do { \
2245  int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2246  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2247  (num_jobs > 1); \
2248  int is_null = !next_td || !prev_td; \
2249  int pos_check = (is_null) ? 1 \
2250  : (next_td != td && \
2251  pos >= next_td->wait_mb_pos) || \
2252  (prev_td != td && \
2253  pos >= prev_td->wait_mb_pos); \
2254  td->thread_mb_pos = pos; \
2255  if (sliced_threading && pos_check) { \
2256  pthread_mutex_lock(&td->lock); \
2257  pthread_cond_broadcast(&td->cond); \
2258  pthread_mutex_unlock(&td->lock); \
2259  } \
2260  } while (0)
2261 #else
2262 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2263 #define update_pos(td, mb_y, mb_x)
2264 #endif
2265 
2267  int jobnr, int threadnr, int is_vp7)
2268 {
2269  VP8Context *s = avctx->priv_data;
2270  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2271  int mb_y = td->thread_mb_pos >> 16;
2272  int mb_x, mb_xy = mb_y * s->mb_width;
2273  int num_jobs = s->num_jobs;
2274  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2275  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2276  VP8Macroblock *mb;
2277  uint8_t *dst[3] = {
2278  curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2279  curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2280  curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2281  };
2282  if (mb_y == 0)
2283  prev_td = td;
2284  else
2285  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2286  if (mb_y == s->mb_height - 1)
2287  next_td = td;
2288  else
2289  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2290  if (s->mb_layout == 1)
2291  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2292  else {
2293  // Make sure the previous frame has read its segmentation map,
2294  // if we re-use the same map.
2295  if (prev_frame && s->segmentation.enabled &&
2297  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2298  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2299  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2300  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2301  }
2302 
2303  if (!is_vp7 || mb_y == 0)
2304  memset(td->left_nnz, 0, sizeof(td->left_nnz));
2305 
2306  s->mv_min.x = -MARGIN;
2307  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2308 
2309  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2310  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2311  if (prev_td != td) {
2312  if (threadnr != 0) {
2313  check_thread_pos(td, prev_td,
2314  mb_x + (is_vp7 ? 2 : 1),
2315  mb_y - (is_vp7 ? 2 : 1));
2316  } else {
2317  check_thread_pos(td, prev_td,
2318  mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2319  mb_y - (is_vp7 ? 2 : 1));
2320  }
2321  }
2322 
2323  s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2324  s->linesize, 4);
2325  s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2326  dst[2] - dst[1], 2);
2327 
2328  if (!s->mb_layout)
2329  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2330  prev_frame && prev_frame->seg_map ?
2331  prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2332 
2333  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2334 
2335  if (!mb->skip)
2336  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2337 
2338  if (mb->mode <= MODE_I4x4)
2339  intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2340  else
2341  inter_predict(s, td, dst, mb, mb_x, mb_y);
2342 
2343  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2344 
2345  if (!mb->skip) {
2346  idct_mb(s, td, dst, mb);
2347  } else {
2348  AV_ZERO64(td->left_nnz);
2349  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2350 
2351  /* Reset DC block predictors if they would exist
2352  * if the mb had coefficients */
2353  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2354  td->left_nnz[8] = 0;
2355  s->top_nnz[mb_x][8] = 0;
2356  }
2357  }
2358 
2359  if (s->deblock_filter)
2360  filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2361 
2362  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2363  if (s->filter.simple)
2364  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2365  NULL, NULL, s->linesize, 0, 1);
2366  else
2367  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2368  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2369  }
2370 
2371  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2372 
2373  dst[0] += 16;
2374  dst[1] += 8;
2375  dst[2] += 8;
2376  s->mv_min.x -= 64;
2377  s->mv_max.x -= 64;
2378 
2379  if (mb_x == s->mb_width + 1) {
2380  update_pos(td, mb_y, s->mb_width + 3);
2381  } else {
2382  update_pos(td, mb_y, mb_x);
2383  }
2384  }
2385 }
2386 
2387 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2388  int jobnr, int threadnr)
2389 {
2390  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2391 }
2392 
2393 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2394  int jobnr, int threadnr)
2395 {
2396  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2397 }
2398 
2399 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2400  int jobnr, int threadnr, int is_vp7)
2401 {
2402  VP8Context *s = avctx->priv_data;
2403  VP8ThreadData *td = &s->thread_data[threadnr];
2404  int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2405  AVFrame *curframe = s->curframe->tf.f;
2406  VP8Macroblock *mb;
2407  VP8ThreadData *prev_td, *next_td;
2408  uint8_t *dst[3] = {
2409  curframe->data[0] + 16 * mb_y * s->linesize,
2410  curframe->data[1] + 8 * mb_y * s->uvlinesize,
2411  curframe->data[2] + 8 * mb_y * s->uvlinesize
2412  };
2413 
2414  if (s->mb_layout == 1)
2415  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2416  else
2417  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2418 
2419  if (mb_y == 0)
2420  prev_td = td;
2421  else
2422  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2423  if (mb_y == s->mb_height - 1)
2424  next_td = td;
2425  else
2426  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2427 
2428  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2429  VP8FilterStrength *f = &td->filter_strength[mb_x];
2430  if (prev_td != td)
2431  check_thread_pos(td, prev_td,
2432  (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2433  if (next_td != td)
2434  if (next_td != &s->thread_data[0])
2435  check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2436 
2437  if (num_jobs == 1) {
2438  if (s->filter.simple)
2439  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2440  NULL, NULL, s->linesize, 0, 1);
2441  else
2442  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2443  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2444  }
2445 
2446  if (s->filter.simple)
2447  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2448  else
2449  filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2450  dst[0] += 16;
2451  dst[1] += 8;
2452  dst[2] += 8;
2453 
2454  update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2455  }
2456 }
2457 
2458 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2459  int jobnr, int threadnr)
2460 {
2461  filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2462 }
2463 
2464 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2465  int jobnr, int threadnr)
2466 {
2467  filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2468 }
2469 
2470 static av_always_inline
2471 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2472  int threadnr, int is_vp7)
2473 {
2474  VP8Context *s = avctx->priv_data;
2475  VP8ThreadData *td = &s->thread_data[jobnr];
2476  VP8ThreadData *next_td = NULL, *prev_td = NULL;
2477  VP8Frame *curframe = s->curframe;
2478  int mb_y, num_jobs = s->num_jobs;
2479 
2480  td->thread_nr = threadnr;
2481  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2482  if (mb_y >= s->mb_height)
2483  break;
2484  td->thread_mb_pos = mb_y << 16;
2485  s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2486  if (s->deblock_filter)
2487  s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2488  update_pos(td, mb_y, INT_MAX & 0xFFFF);
2489 
2490  s->mv_min.y -= 64;
2491  s->mv_max.y -= 64;
2492 
2493  if (avctx->active_thread_type == FF_THREAD_FRAME)
2494  ff_thread_report_progress(&curframe->tf, mb_y, 0);
2495  }
2496 
2497  return 0;
2498 }
2499 
2500 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2501  int jobnr, int threadnr)
2502 {
2503  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2504 }
2505 
2506 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2507  int jobnr, int threadnr)
2508 {
2509  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2510 }
2511 
2512 
2513 static av_always_inline
2514 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2515  AVPacket *avpkt, int is_vp7)
2516 {
2517  VP8Context *s = avctx->priv_data;
2518  int ret, i, referenced, num_jobs;
2519  enum AVDiscard skip_thresh;
2520  VP8Frame *av_uninit(curframe), *prev_frame;
2521 
2522  if (is_vp7)
2523  ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2524  else
2525  ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2526 
2527  if (ret < 0)
2528  goto err;
2529 
2530  prev_frame = s->framep[VP56_FRAME_CURRENT];
2531 
2532  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2534 
2535  skip_thresh = !referenced ? AVDISCARD_NONREF
2536  : !s->keyframe ? AVDISCARD_NONKEY
2537  : AVDISCARD_ALL;
2538 
2539  if (avctx->skip_frame >= skip_thresh) {
2540  s->invisible = 1;
2541  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2542  goto skip_decode;
2543  }
2544  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2545 
2546  // release no longer referenced frames
2547  for (i = 0; i < 5; i++)
2548  if (s->frames[i].tf.f->data[0] &&
2549  &s->frames[i] != prev_frame &&
2550  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2551  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2552  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2553  vp8_release_frame(s, &s->frames[i]);
2554 
2555  curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2556 
2557  if (!s->colorspace)
2558  avctx->colorspace = AVCOL_SPC_BT470BG;
2559  if (s->fullrange)
2560  avctx->color_range = AVCOL_RANGE_JPEG;
2561  else
2562  avctx->color_range = AVCOL_RANGE_MPEG;
2563 
2564  /* Given that arithmetic probabilities are updated every frame, it's quite
2565  * likely that the values we have on a random interframe are complete
2566  * junk if we didn't start decode on a keyframe. So just don't display
2567  * anything rather than junk. */
2568  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2569  !s->framep[VP56_FRAME_GOLDEN] ||
2570  !s->framep[VP56_FRAME_GOLDEN2])) {
2571  av_log(avctx, AV_LOG_WARNING,
2572  "Discarding interframe without a prior keyframe!\n");
2573  ret = AVERROR_INVALIDDATA;
2574  goto err;
2575  }
2576 
2577  curframe->tf.f->key_frame = s->keyframe;
2578  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2580  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2581  goto err;
2582 
2583  // check if golden and altref are swapped
2584  if (s->update_altref != VP56_FRAME_NONE)
2586  else
2588 
2589  if (s->update_golden != VP56_FRAME_NONE)
2591  else
2593 
2594  if (s->update_last)
2595  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2596  else
2598 
2599  s->next_framep[VP56_FRAME_CURRENT] = curframe;
2600 
2601  if (avctx->codec->update_thread_context)
2602  ff_thread_finish_setup(avctx);
2603 
2604  s->linesize = curframe->tf.f->linesize[0];
2605  s->uvlinesize = curframe->tf.f->linesize[1];
2606 
2607  memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2608  /* Zero macroblock structures for top/top-left prediction
2609  * from outside the frame. */
2610  if (!s->mb_layout)
2611  memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2612  (s->mb_width + 1) * sizeof(*s->macroblocks));
2613  if (!s->mb_layout && s->keyframe)
2614  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2615 
2616  memset(s->ref_count, 0, sizeof(s->ref_count));
2617 
2618  if (s->mb_layout == 1) {
2619  // Make sure the previous frame has read its segmentation map,
2620  // if we re-use the same map.
2621  if (prev_frame && s->segmentation.enabled &&
2623  ff_thread_await_progress(&prev_frame->tf, 1, 0);
2624  if (is_vp7)
2625  vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2626  else
2627  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2628  }
2629 
2630  if (avctx->active_thread_type == FF_THREAD_FRAME)
2631  num_jobs = 1;
2632  else
2633  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2634  s->num_jobs = num_jobs;
2635  s->curframe = curframe;
2636  s->prev_frame = prev_frame;
2637  s->mv_min.y = -MARGIN;
2638  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2639  for (i = 0; i < MAX_THREADS; i++) {
2640  s->thread_data[i].thread_mb_pos = 0;
2641  s->thread_data[i].wait_mb_pos = INT_MAX;
2642  }
2643  if (is_vp7)
2645  num_jobs);
2646  else
2648  num_jobs);
2649 
2650  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2651  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2652 
2653 skip_decode:
2654  // if future frames don't use the updated probabilities,
2655  // reset them to the values we saved
2656  if (!s->update_probabilities)
2657  s->prob[0] = s->prob[1];
2658 
2659  if (!s->invisible) {
2660  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2661  return ret;
2662  *got_frame = 1;
2663  }
2664 
2665  return avpkt->size;
2666 err:
2667  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2668  return ret;
2669 }
2670 
2671 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2672  AVPacket *avpkt)
2673 {
2674  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2675 }
2676 
2677 #if CONFIG_VP7_DECODER
2678 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2679  AVPacket *avpkt)
2680 {
2681  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2682 }
2683 #endif /* CONFIG_VP7_DECODER */
2684 
2686 {
2687  VP8Context *s = avctx->priv_data;
2688  int i;
2689 
2690  vp8_decode_flush_impl(avctx, 1);
2691  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2692  av_frame_free(&s->frames[i].tf.f);
2693 
2694  return 0;
2695 }
2696 
2698 {
2699  int i;
2700  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2701  s->frames[i].tf.f = av_frame_alloc();
2702  if (!s->frames[i].tf.f)
2703  return AVERROR(ENOMEM);
2704  }
2705  return 0;
2706 }
2707 
2708 static av_always_inline
2709 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2710 {
2711  VP8Context *s = avctx->priv_data;
2712  int ret;
2713 
2714  s->avctx = avctx;
2715  s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2716  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2717  avctx->internal->allocate_progress = 1;
2718 
2719  ff_videodsp_init(&s->vdsp, 8);
2720 
2721  ff_vp78dsp_init(&s->vp8dsp);
2722  if (CONFIG_VP7_DECODER && is_vp7) {
2724  ff_vp7dsp_init(&s->vp8dsp);
2727  } else if (CONFIG_VP8_DECODER && !is_vp7) {
2729  ff_vp8dsp_init(&s->vp8dsp);
2732  }
2733 
2734  /* does not change for VP8 */
2735  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2736 
2737  if ((ret = vp8_init_frames(s)) < 0) {
2738  ff_vp8_decode_free(avctx);
2739  return ret;
2740  }
2741 
2742  return 0;
2743 }
2744 
2745 #if CONFIG_VP7_DECODER
2746 static int vp7_decode_init(AVCodecContext *avctx)
2747 {
2748  return vp78_decode_init(avctx, IS_VP7);
2749 }
2750 #endif /* CONFIG_VP7_DECODER */
2751 
2753 {
2754  return vp78_decode_init(avctx, IS_VP8);
2755 }
2756 
2757 #if CONFIG_VP8_DECODER
2758 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2759 {
2760  VP8Context *s = avctx->priv_data;
2761  int ret;
2762 
2763  s->avctx = avctx;
2764 
2765  if ((ret = vp8_init_frames(s)) < 0) {
2766  ff_vp8_decode_free(avctx);
2767  return ret;
2768  }
2769 
2770  return 0;
2771 }
2772 
2773 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2774 
2775 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2776  const AVCodecContext *src)
2777 {
2778  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2779  int i;
2780 
2781  if (s->macroblocks_base &&
2782  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2783  free_buffers(s);
2784  s->mb_width = s_src->mb_width;
2785  s->mb_height = s_src->mb_height;
2786  }
2787 
2788  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2789  s->segmentation = s_src->segmentation;
2790  s->lf_delta = s_src->lf_delta;
2791  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2792 
2793  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2794  if (s_src->frames[i].tf.f->data[0]) {
2795  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2796  if (ret < 0)
2797  return ret;
2798  }
2799  }
2800 
2801  s->framep[0] = REBASE(s_src->next_framep[0]);
2802  s->framep[1] = REBASE(s_src->next_framep[1]);
2803  s->framep[2] = REBASE(s_src->next_framep[2]);
2804  s->framep[3] = REBASE(s_src->next_framep[3]);
2805 
2806  return 0;
2807 }
2808 #endif /* CONFIG_VP8_DECODER */
2809 
2810 #if CONFIG_VP7_DECODER
2811 AVCodec ff_vp7_decoder = {
2812  .name = "vp7",
2813  .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2814  .type = AVMEDIA_TYPE_VIDEO,
2815  .id = AV_CODEC_ID_VP7,
2816  .priv_data_size = sizeof(VP8Context),
2817  .init = vp7_decode_init,
2818  .close = ff_vp8_decode_free,
2819  .decode = vp7_decode_frame,
2820  .capabilities = CODEC_CAP_DR1,
2822 };
2823 #endif /* CONFIG_VP7_DECODER */
2824 
2825 #if CONFIG_VP8_DECODER
2826 AVCodec ff_vp8_decoder = {
2827  .name = "vp8",
2828  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2829  .type = AVMEDIA_TYPE_VIDEO,
2830  .id = AV_CODEC_ID_VP8,
2831  .priv_data_size = sizeof(VP8Context),
2833  .close = ff_vp8_decode_free,
2837  .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2838  .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2839 };
2840 #endif /* CONFIG_VP7_DECODER */