00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "libavutil/imgutils.h"
00027 #include "avcodec.h"
00028 #include "internal.h"
00029 #include "vp8.h"
00030 #include "vp8data.h"
00031 #include "rectangle.h"
00032 #include "thread.h"
00033
00034 #if ARCH_ARM
00035 # include "arm/vp8.h"
00036 #endif
00037
00038 static void free_buffers(VP8Context *s)
00039 {
00040 int i;
00041 if (s->thread_data)
00042 for (i = 0; i < MAX_THREADS; i++) {
00043 av_freep(&s->thread_data[i].filter_strength);
00044 av_freep(&s->thread_data[i].edge_emu_buffer);
00045 }
00046 av_freep(&s->thread_data);
00047 av_freep(&s->macroblocks_base);
00048 av_freep(&s->intra4x4_pred_mode_top);
00049 av_freep(&s->top_nnz);
00050 av_freep(&s->top_border);
00051
00052 s->macroblocks = NULL;
00053 }
00054
00055 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
00056 {
00057 int ret;
00058 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
00059 return ret;
00060 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
00061 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
00062 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
00063 ff_thread_release_buffer(s->avctx, f);
00064 return AVERROR(ENOMEM);
00065 }
00066 return 0;
00067 }
00068
00069 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
00070 {
00071 if (f->ref_index[0]) {
00072 if (prefer_delayed_free) {
00073
00074
00075
00076
00077 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
00078 if (s->num_maps_to_be_freed < max_queued_maps) {
00079 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
00080 } else if (can_direct_free) {
00081 av_free(f->ref_index[0]);
00082 }
00083 f->ref_index[0] = NULL;
00084 } else {
00085 av_free(f->ref_index[0]);
00086 }
00087 }
00088 ff_thread_release_buffer(s->avctx, f);
00089 }
00090
00091 static void vp8_decode_flush_impl(AVCodecContext *avctx,
00092 int prefer_delayed_free, int can_direct_free, int free_mem)
00093 {
00094 VP8Context *s = avctx->priv_data;
00095 int i;
00096
00097 if (!avctx->internal->is_copy) {
00098 for (i = 0; i < 5; i++)
00099 if (s->frames[i].data[0])
00100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
00101 }
00102 memset(s->framep, 0, sizeof(s->framep));
00103
00104 if (free_mem) {
00105 free_buffers(s);
00106 s->maps_are_invalid = 1;
00107 }
00108 }
00109
00110 static void vp8_decode_flush(AVCodecContext *avctx)
00111 {
00112 vp8_decode_flush_impl(avctx, 1, 1, 0);
00113 }
00114
00115 static int update_dimensions(VP8Context *s, int width, int height)
00116 {
00117 AVCodecContext *avctx = s->avctx;
00118 int i;
00119
00120 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
00121 height != s->avctx->height) {
00122 if (av_image_check_size(width, height, 0, s->avctx))
00123 return AVERROR_INVALIDDATA;
00124
00125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
00126
00127 avcodec_set_dimensions(s->avctx, width, height);
00128 }
00129
00130 s->mb_width = (s->avctx->coded_width +15) / 16;
00131 s->mb_height = (s->avctx->coded_height+15) / 16;
00132
00133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
00134 if (!s->mb_layout) {
00135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
00137 }
00138 else
00139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
00140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
00143
00144 for (i = 0; i < MAX_THREADS; i++) {
00145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
00146 #if HAVE_THREADS
00147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
00148 pthread_cond_init(&s->thread_data[i].cond, NULL);
00149 #endif
00150 }
00151
00152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
00153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
00154 return AVERROR(ENOMEM);
00155
00156 s->macroblocks = s->macroblocks_base + 1;
00157
00158 return 0;
00159 }
00160
00161 static void parse_segment_info(VP8Context *s)
00162 {
00163 VP56RangeCoder *c = &s->c;
00164 int i;
00165
00166 s->segmentation.update_map = vp8_rac_get(c);
00167
00168 if (vp8_rac_get(c)) {
00169 s->segmentation.absolute_vals = vp8_rac_get(c);
00170
00171 for (i = 0; i < 4; i++)
00172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
00173
00174 for (i = 0; i < 4; i++)
00175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00176 }
00177 if (s->segmentation.update_map)
00178 for (i = 0; i < 3; i++)
00179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00180 }
00181
00182 static void update_lf_deltas(VP8Context *s)
00183 {
00184 VP56RangeCoder *c = &s->c;
00185 int i;
00186
00187 for (i = 0; i < 4; i++) {
00188 if (vp8_rac_get(c)) {
00189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
00190
00191 if (vp8_rac_get(c))
00192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
00193 }
00194 }
00195
00196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
00197 if (vp8_rac_get(c)) {
00198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
00199
00200 if (vp8_rac_get(c))
00201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
00202 }
00203 }
00204 }
00205
00206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00207 {
00208 const uint8_t *sizes = buf;
00209 int i;
00210
00211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00212
00213 buf += 3*(s->num_coeff_partitions-1);
00214 buf_size -= 3*(s->num_coeff_partitions-1);
00215 if (buf_size < 0)
00216 return -1;
00217
00218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
00219 int size = AV_RL24(sizes + 3*i);
00220 if (buf_size - size < 0)
00221 return -1;
00222
00223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00224 buf += size;
00225 buf_size -= size;
00226 }
00227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00228
00229 return 0;
00230 }
00231
00232 static void get_quants(VP8Context *s)
00233 {
00234 VP56RangeCoder *c = &s->c;
00235 int i, base_qi;
00236
00237 int yac_qi = vp8_rac_get_uint(c, 7);
00238 int ydc_delta = vp8_rac_get_sint(c, 4);
00239 int y2dc_delta = vp8_rac_get_sint(c, 4);
00240 int y2ac_delta = vp8_rac_get_sint(c, 4);
00241 int uvdc_delta = vp8_rac_get_sint(c, 4);
00242 int uvac_delta = vp8_rac_get_sint(c, 4);
00243
00244 for (i = 0; i < 4; i++) {
00245 if (s->segmentation.enabled) {
00246 base_qi = s->segmentation.base_quant[i];
00247 if (!s->segmentation.absolute_vals)
00248 base_qi += yac_qi;
00249 } else
00250 base_qi = yac_qi;
00251
00252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
00254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00255
00256 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
00257 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00258 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00259
00260 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00261 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00262 }
00263 }
00264
00278 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00279 {
00280 VP56RangeCoder *c = &s->c;
00281
00282 if (update)
00283 return VP56_FRAME_CURRENT;
00284
00285 switch (vp8_rac_get_uint(c, 2)) {
00286 case 1:
00287 return VP56_FRAME_PREVIOUS;
00288 case 2:
00289 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00290 }
00291 return VP56_FRAME_NONE;
00292 }
00293
00294 static void update_refs(VP8Context *s)
00295 {
00296 VP56RangeCoder *c = &s->c;
00297
00298 int update_golden = vp8_rac_get(c);
00299 int update_altref = vp8_rac_get(c);
00300
00301 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00302 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00303 }
00304
00305 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00306 {
00307 VP56RangeCoder *c = &s->c;
00308 int header_size, hscale, vscale, i, j, k, l, m, ret;
00309 int width = s->avctx->width;
00310 int height = s->avctx->height;
00311
00312 s->keyframe = !(buf[0] & 1);
00313 s->profile = (buf[0]>>1) & 7;
00314 s->invisible = !(buf[0] & 0x10);
00315 header_size = AV_RL24(buf) >> 5;
00316 buf += 3;
00317 buf_size -= 3;
00318
00319 if (s->profile > 3)
00320 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00321
00322 if (!s->profile)
00323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00324 else
00325 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00326
00327 if (header_size > buf_size - 7*s->keyframe) {
00328 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00329 return AVERROR_INVALIDDATA;
00330 }
00331
00332 if (s->keyframe) {
00333 if (AV_RL24(buf) != 0x2a019d) {
00334 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00335 return AVERROR_INVALIDDATA;
00336 }
00337 width = AV_RL16(buf+3) & 0x3fff;
00338 height = AV_RL16(buf+5) & 0x3fff;
00339 hscale = buf[4] >> 6;
00340 vscale = buf[6] >> 6;
00341 buf += 7;
00342 buf_size -= 7;
00343
00344 if (hscale || vscale)
00345 av_log_missing_feature(s->avctx, "Upscaling", 1);
00346
00347 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00348 for (i = 0; i < 4; i++)
00349 for (j = 0; j < 16; j++)
00350 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00351 sizeof(s->prob->token[i][j]));
00352 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00353 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00354 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
00355 memset(&s->segmentation, 0, sizeof(s->segmentation));
00356 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
00357 }
00358
00359 ff_vp56_init_range_decoder(c, buf, header_size);
00360 buf += header_size;
00361 buf_size -= header_size;
00362
00363 if (s->keyframe) {
00364 if (vp8_rac_get(c))
00365 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00366 vp8_rac_get(c);
00367 }
00368
00369 if ((s->segmentation.enabled = vp8_rac_get(c)))
00370 parse_segment_info(s);
00371 else
00372 s->segmentation.update_map = 0;
00373
00374 s->filter.simple = vp8_rac_get(c);
00375 s->filter.level = vp8_rac_get_uint(c, 6);
00376 s->filter.sharpness = vp8_rac_get_uint(c, 3);
00377
00378 if ((s->lf_delta.enabled = vp8_rac_get(c)))
00379 if (vp8_rac_get(c))
00380 update_lf_deltas(s);
00381
00382 if (setup_partitions(s, buf, buf_size)) {
00383 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00384 return AVERROR_INVALIDDATA;
00385 }
00386
00387 if (!s->macroblocks_base ||
00388 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
00389 if ((ret = update_dimensions(s, width, height)) < 0)
00390 return ret;
00391 }
00392
00393 get_quants(s);
00394
00395 if (!s->keyframe) {
00396 update_refs(s);
00397 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
00398 s->sign_bias[VP56_FRAME_GOLDEN2 ] = vp8_rac_get(c);
00399 }
00400
00401
00402
00403 if (!(s->update_probabilities = vp8_rac_get(c)))
00404 s->prob[1] = s->prob[0];
00405
00406 s->update_last = s->keyframe || vp8_rac_get(c);
00407
00408 for (i = 0; i < 4; i++)
00409 for (j = 0; j < 8; j++)
00410 for (k = 0; k < 3; k++)
00411 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00412 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00413 int prob = vp8_rac_get_uint(c, 8);
00414 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00415 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00416 }
00417
00418 if ((s->mbskip_enabled = vp8_rac_get(c)))
00419 s->prob->mbskip = vp8_rac_get_uint(c, 8);
00420
00421 if (!s->keyframe) {
00422 s->prob->intra = vp8_rac_get_uint(c, 8);
00423 s->prob->last = vp8_rac_get_uint(c, 8);
00424 s->prob->golden = vp8_rac_get_uint(c, 8);
00425
00426 if (vp8_rac_get(c))
00427 for (i = 0; i < 4; i++)
00428 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00429 if (vp8_rac_get(c))
00430 for (i = 0; i < 3; i++)
00431 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
00432
00433
00434 for (i = 0; i < 2; i++)
00435 for (j = 0; j < 19; j++)
00436 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00437 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00438 }
00439
00440 return 0;
00441 }
00442
00443 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00444 {
00445 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00446 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00447 }
00448
00452 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00453 {
00454 int bit, x = 0;
00455
00456 if (vp56_rac_get_prob_branchy(c, p[0])) {
00457 int i;
00458
00459 for (i = 0; i < 3; i++)
00460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
00461 for (i = 9; i > 3; i--)
00462 x += vp56_rac_get_prob(c, p[9 + i]) << i;
00463 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00464 x += 8;
00465 } else {
00466
00467 const uint8_t *ps = p+2;
00468 bit = vp56_rac_get_prob(c, *ps);
00469 ps += 1 + 3*bit;
00470 x += 4*bit;
00471 bit = vp56_rac_get_prob(c, *ps);
00472 ps += 1 + bit;
00473 x += 2*bit;
00474 x += vp56_rac_get_prob(c, *ps);
00475 }
00476
00477 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00478 }
00479
00480 static av_always_inline
00481 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00482 {
00483 if (left == top)
00484 return vp8_submv_prob[4-!!left];
00485 if (!top)
00486 return vp8_submv_prob[2];
00487 return vp8_submv_prob[1-!!left];
00488 }
00489
00494 static av_always_inline
00495 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
00496 {
00497 int part_idx;
00498 int n, num;
00499 VP8Macroblock *top_mb;
00500 VP8Macroblock *left_mb = &mb[-1];
00501 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00502 *mbsplits_top,
00503 *mbsplits_cur, *firstidx;
00504 VP56mv *top_mv;
00505 VP56mv *left_mv = left_mb->bmv;
00506 VP56mv *cur_mv = mb->bmv;
00507
00508 if (!layout)
00509 top_mb = &mb[2];
00510 else
00511 top_mb = &mb[-s->mb_width-1];
00512 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
00513 top_mv = top_mb->bmv;
00514
00515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00516 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00517 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00518 } else {
00519 part_idx = VP8_SPLITMVMODE_8x8;
00520 }
00521 } else {
00522 part_idx = VP8_SPLITMVMODE_4x4;
00523 }
00524
00525 num = vp8_mbsplit_count[part_idx];
00526 mbsplits_cur = vp8_mbsplits[part_idx],
00527 firstidx = vp8_mbfirstidx[part_idx];
00528 mb->partitioning = part_idx;
00529
00530 for (n = 0; n < num; n++) {
00531 int k = firstidx[n];
00532 uint32_t left, above;
00533 const uint8_t *submv_prob;
00534
00535 if (!(k & 3))
00536 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00537 else
00538 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00539 if (k <= 3)
00540 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00541 else
00542 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00543
00544 submv_prob = get_submv_prob(left, above);
00545
00546 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00547 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00548 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00549 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00550 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00551 } else {
00552 AV_ZERO32(&mb->bmv[n]);
00553 }
00554 } else {
00555 AV_WN32A(&mb->bmv[n], above);
00556 }
00557 } else {
00558 AV_WN32A(&mb->bmv[n], left);
00559 }
00560 }
00561
00562 return num;
00563 }
00564
00565 static av_always_inline
00566 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
00567 {
00568 VP8Macroblock *mb_edge[3] = { 0 ,
00569 mb - 1 ,
00570 0 };
00571 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00572 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00573 int idx = CNT_ZERO;
00574 int cur_sign_bias = s->sign_bias[mb->ref_frame];
00575 int8_t *sign_bias = s->sign_bias;
00576 VP56mv near_mv[4];
00577 uint8_t cnt[4] = { 0 };
00578 VP56RangeCoder *c = &s->c;
00579
00580 if (!layout) {
00581 mb_edge[0] = mb + 2;
00582 mb_edge[2] = mb + 1;
00583 }
00584 else {
00585 mb_edge[0] = mb - s->mb_width-1;
00586 mb_edge[2] = mb - s->mb_width-2;
00587 }
00588
00589 AV_ZERO32(&near_mv[0]);
00590 AV_ZERO32(&near_mv[1]);
00591 AV_ZERO32(&near_mv[2]);
00592
00593
00594 #define MV_EDGE_CHECK(n)\
00595 {\
00596 VP8Macroblock *edge = mb_edge[n];\
00597 int edge_ref = edge->ref_frame;\
00598 if (edge_ref != VP56_FRAME_CURRENT) {\
00599 uint32_t mv = AV_RN32A(&edge->mv);\
00600 if (mv) {\
00601 if (cur_sign_bias != sign_bias[edge_ref]) {\
00602 \
00603 mv = ~mv;\
00604 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00605 }\
00606 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00607 AV_WN32A(&near_mv[++idx], mv);\
00608 cnt[idx] += 1 + (n != 2);\
00609 } else\
00610 cnt[CNT_ZERO] += 1 + (n != 2);\
00611 }\
00612 }
00613
00614 MV_EDGE_CHECK(0)
00615 MV_EDGE_CHECK(1)
00616 MV_EDGE_CHECK(2)
00617
00618 mb->partitioning = VP8_SPLITMVMODE_NONE;
00619 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00620 mb->mode = VP8_MVMODE_MV;
00621
00622
00623 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00624 cnt[CNT_NEAREST] += 1;
00625
00626
00627 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00628 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
00629 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00630 }
00631
00632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00633 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00634
00635
00636 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00637 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
00638 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
00639 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00640
00641 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00642 mb->mode = VP8_MVMODE_SPLIT;
00643 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
00644 } else {
00645 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00646 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00647 mb->bmv[0] = mb->mv;
00648 }
00649 } else {
00650 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00651 mb->bmv[0] = mb->mv;
00652 }
00653 } else {
00654 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00655 mb->bmv[0] = mb->mv;
00656 }
00657 } else {
00658 mb->mode = VP8_MVMODE_ZERO;
00659 AV_ZERO32(&mb->mv);
00660 mb->bmv[0] = mb->mv;
00661 }
00662 }
00663
00664 static av_always_inline
00665 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00666 int mb_x, int keyframe, int layout)
00667 {
00668 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
00669
00670 if (layout == 1) {
00671 VP8Macroblock *mb_top = mb - s->mb_width - 1;
00672 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
00673 }
00674 if (keyframe) {
00675 int x, y;
00676 uint8_t* top;
00677 uint8_t* const left = s->intra4x4_pred_mode_left;
00678 if (layout == 1)
00679 top = mb->intra4x4_pred_mode_top;
00680 else
00681 top = s->intra4x4_pred_mode_top + 4 * mb_x;
00682 for (y = 0; y < 4; y++) {
00683 for (x = 0; x < 4; x++) {
00684 const uint8_t *ctx;
00685 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00686 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00687 left[y] = top[x] = *intra4x4;
00688 intra4x4++;
00689 }
00690 }
00691 } else {
00692 int i;
00693 for (i = 0; i < 16; i++)
00694 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00695 }
00696 }
00697
00698 static av_always_inline
00699 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
00700 uint8_t *segment, uint8_t *ref, int layout)
00701 {
00702 VP56RangeCoder *c = &s->c;
00703
00704 if (s->segmentation.update_map) {
00705 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
00706 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
00707 } else if (s->segmentation.enabled)
00708 *segment = ref ? *ref : *segment;
00709 mb->segment = *segment;
00710
00711 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00712
00713 if (s->keyframe) {
00714 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00715
00716 if (mb->mode == MODE_I4x4) {
00717 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
00718 } else {
00719 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00720 if (s->mb_layout == 1)
00721 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
00722 else
00723 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00724 AV_WN32A( s->intra4x4_pred_mode_left, modes);
00725 }
00726
00727 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00728 mb->ref_frame = VP56_FRAME_CURRENT;
00729 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00730
00731 if (vp56_rac_get_prob_branchy(c, s->prob->last))
00732 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00733 VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00734 else
00735 mb->ref_frame = VP56_FRAME_PREVIOUS;
00736 s->ref_count[mb->ref_frame-1]++;
00737
00738
00739 decode_mvs(s, mb, mb_x, mb_y, layout);
00740 } else {
00741
00742 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00743
00744 if (mb->mode == MODE_I4x4)
00745 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
00746
00747 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00748 mb->ref_frame = VP56_FRAME_CURRENT;
00749 mb->partitioning = VP8_SPLITMVMODE_NONE;
00750 AV_ZERO32(&mb->bmv[0]);
00751 }
00752 }
00753
00754 #ifndef decode_block_coeffs_internal
00755
00764 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
00765 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00766 int i, uint8_t *token_prob, int16_t qmul[2])
00767 {
00768 VP56RangeCoder c = *r;
00769 goto skip_eob;
00770 do {
00771 int coeff;
00772 if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))
00773 break;
00774
00775 skip_eob:
00776 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) {
00777 if (++i == 16)
00778 break;
00779 token_prob = probs[i][0];
00780 goto skip_eob;
00781 }
00782
00783 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) {
00784 coeff = 1;
00785 token_prob = probs[i+1][1];
00786 } else {
00787 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) {
00788 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
00789 if (coeff)
00790 coeff += vp56_rac_get_prob(&c, token_prob[5]);
00791 coeff += 2;
00792 } else {
00793
00794 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
00795 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) {
00796 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
00797 } else {
00798 coeff = 7;
00799 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
00800 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
00801 }
00802 } else {
00803 int a = vp56_rac_get_prob(&c, token_prob[8]);
00804 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
00805 int cat = (a<<1) + b;
00806 coeff = 3 + (8<<cat);
00807 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
00808 }
00809 }
00810 token_prob = probs[i+1][2];
00811 }
00812 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
00813 } while (++i < 16);
00814
00815 *r = c;
00816 return i;
00817 }
00818 #endif
00819
00831 static av_always_inline
00832 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00833 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00834 int i, int zero_nhood, int16_t qmul[2])
00835 {
00836 uint8_t *token_prob = probs[i][zero_nhood];
00837 if (!vp56_rac_get_prob_branchy(c, token_prob[0]))
00838 return 0;
00839 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00840 }
00841
00842 static av_always_inline
00843 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
00844 uint8_t t_nnz[9], uint8_t l_nnz[9])
00845 {
00846 int i, x, y, luma_start = 0, luma_ctx = 3;
00847 int nnz_pred, nnz, nnz_total = 0;
00848 int segment = mb->segment;
00849 int block_dc = 0;
00850
00851 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00852 nnz_pred = t_nnz[8] + l_nnz[8];
00853
00854
00855 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
00856 s->qmat[segment].luma_dc_qmul);
00857 l_nnz[8] = t_nnz[8] = !!nnz;
00858 if (nnz) {
00859 nnz_total += nnz;
00860 block_dc = 1;
00861 if (nnz == 1)
00862 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
00863 else
00864 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
00865 }
00866 luma_start = 1;
00867 luma_ctx = 0;
00868 }
00869
00870
00871 for (y = 0; y < 4; y++)
00872 for (x = 0; x < 4; x++) {
00873 nnz_pred = l_nnz[y] + t_nnz[x];
00874 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
00875 nnz_pred, s->qmat[segment].luma_qmul);
00876
00877 td->non_zero_count_cache[y][x] = nnz + block_dc;
00878 t_nnz[x] = l_nnz[y] = !!nnz;
00879 nnz_total += nnz;
00880 }
00881
00882
00883
00884
00885 for (i = 4; i < 6; i++)
00886 for (y = 0; y < 2; y++)
00887 for (x = 0; x < 2; x++) {
00888 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00889 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
00890 nnz_pred, s->qmat[segment].chroma_qmul);
00891 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
00892 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00893 nnz_total += nnz;
00894 }
00895
00896
00897
00898
00899 if (!nnz_total)
00900 mb->skip = 1;
00901 }
00902
00903 static av_always_inline
00904 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00905 int linesize, int uvlinesize, int simple)
00906 {
00907 AV_COPY128(top_border, src_y + 15*linesize);
00908 if (!simple) {
00909 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00910 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00911 }
00912 }
00913
00914 static av_always_inline
00915 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00916 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00917 int simple, int xchg)
00918 {
00919 uint8_t *top_border_m1 = top_border-32;
00920 src_y -= linesize;
00921 src_cb -= uvlinesize;
00922 src_cr -= uvlinesize;
00923
00924 #define XCHG(a,b,xchg) do { \
00925 if (xchg) AV_SWAP64(b,a); \
00926 else AV_COPY64(b,a); \
00927 } while (0)
00928
00929 XCHG(top_border_m1+8, src_y-8, xchg);
00930 XCHG(top_border, src_y, xchg);
00931 XCHG(top_border+8, src_y+8, 1);
00932 if (mb_x < mb_width-1)
00933 XCHG(top_border+32, src_y+16, 1);
00934
00935
00936
00937 if (!simple || !mb_y) {
00938 XCHG(top_border_m1+16, src_cb-8, xchg);
00939 XCHG(top_border_m1+24, src_cr-8, xchg);
00940 XCHG(top_border+16, src_cb, 1);
00941 XCHG(top_border+24, src_cr, 1);
00942 }
00943 }
00944
00945 static av_always_inline
00946 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00947 {
00948 if (!mb_x) {
00949 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00950 } else {
00951 return mb_y ? mode : LEFT_DC_PRED8x8;
00952 }
00953 }
00954
00955 static av_always_inline
00956 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00957 {
00958 if (!mb_x) {
00959 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00960 } else {
00961 return mb_y ? mode : HOR_PRED8x8;
00962 }
00963 }
00964
00965 static av_always_inline
00966 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00967 {
00968 if (mode == DC_PRED8x8) {
00969 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00970 } else {
00971 return mode;
00972 }
00973 }
00974
00975 static av_always_inline
00976 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00977 {
00978 switch (mode) {
00979 case DC_PRED8x8:
00980 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00981 case VERT_PRED8x8:
00982 return !mb_y ? DC_127_PRED8x8 : mode;
00983 case HOR_PRED8x8:
00984 return !mb_x ? DC_129_PRED8x8 : mode;
00985 case PLANE_PRED8x8 :
00986 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00987 }
00988 return mode;
00989 }
00990
00991 static av_always_inline
00992 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00993 {
00994 if (!mb_x) {
00995 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00996 } else {
00997 return mb_y ? mode : HOR_VP8_PRED;
00998 }
00999 }
01000
01001 static av_always_inline
01002 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
01003 {
01004 switch (mode) {
01005 case VERT_PRED:
01006 if (!mb_x && mb_y) {
01007 *copy_buf = 1;
01008 return mode;
01009 }
01010
01011 case DIAG_DOWN_LEFT_PRED:
01012 case VERT_LEFT_PRED:
01013 return !mb_y ? DC_127_PRED : mode;
01014 case HOR_PRED:
01015 if (!mb_y) {
01016 *copy_buf = 1;
01017 return mode;
01018 }
01019
01020 case HOR_UP_PRED:
01021 return !mb_x ? DC_129_PRED : mode;
01022 case TM_VP8_PRED:
01023 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
01024 case DC_PRED:
01025 case DIAG_DOWN_RIGHT_PRED:
01026 case VERT_RIGHT_PRED:
01027 case HOR_DOWN_PRED:
01028 if (!mb_y || !mb_x)
01029 *copy_buf = 1;
01030 return mode;
01031 }
01032 return mode;
01033 }
01034
01035 static av_always_inline
01036 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01037 VP8Macroblock *mb, int mb_x, int mb_y)
01038 {
01039 AVCodecContext *avctx = s->avctx;
01040 int x, y, mode, nnz;
01041 uint32_t tr;
01042
01043
01044
01045 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
01046 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01047 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01048 s->filter.simple, 1);
01049
01050 if (mb->mode < MODE_I4x4) {
01051 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01052 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
01053 } else {
01054 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
01055 }
01056 s->hpc.pred16x16[mode](dst[0], s->linesize);
01057 } else {
01058 uint8_t *ptr = dst[0];
01059 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
01060 uint8_t tr_top[4] = { 127, 127, 127, 127 };
01061
01062
01063
01064 uint8_t *tr_right = ptr - s->linesize + 16;
01065
01066
01067
01068 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
01069 mb_x == s->mb_width-1) {
01070 tr = tr_right[-1]*0x01010101u;
01071 tr_right = (uint8_t *)&tr;
01072 }
01073
01074 if (mb->skip)
01075 AV_ZERO128(td->non_zero_count_cache);
01076
01077 for (y = 0; y < 4; y++) {
01078 uint8_t *topright = ptr + 4 - s->linesize;
01079 for (x = 0; x < 4; x++) {
01080 int copy = 0, linesize = s->linesize;
01081 uint8_t *dst = ptr+4*x;
01082 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
01083
01084 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
01085 topright = tr_top;
01086 } else if (x == 3)
01087 topright = tr_right;
01088
01089 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01090 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
01091 if (copy) {
01092 dst = copy_dst + 12;
01093 linesize = 8;
01094 if (!(mb_y + y)) {
01095 copy_dst[3] = 127U;
01096 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
01097 } else {
01098 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
01099 if (!(mb_x + x)) {
01100 copy_dst[3] = 129U;
01101 } else {
01102 copy_dst[3] = ptr[4*x-s->linesize-1];
01103 }
01104 }
01105 if (!(mb_x + x)) {
01106 copy_dst[11] =
01107 copy_dst[19] =
01108 copy_dst[27] =
01109 copy_dst[35] = 129U;
01110 } else {
01111 copy_dst[11] = ptr[4*x -1];
01112 copy_dst[19] = ptr[4*x+s->linesize -1];
01113 copy_dst[27] = ptr[4*x+s->linesize*2-1];
01114 copy_dst[35] = ptr[4*x+s->linesize*3-1];
01115 }
01116 }
01117 } else {
01118 mode = intra4x4[x];
01119 }
01120 s->hpc.pred4x4[mode](dst, topright, linesize);
01121 if (copy) {
01122 AV_COPY32(ptr+4*x , copy_dst+12);
01123 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
01124 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01125 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01126 }
01127
01128 nnz = td->non_zero_count_cache[y][x];
01129 if (nnz) {
01130 if (nnz == 1)
01131 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
01132 else
01133 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
01134 }
01135 topright += 4;
01136 }
01137
01138 ptr += 4*s->linesize;
01139 intra4x4 += 4;
01140 }
01141 }
01142
01143 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01144 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
01145 } else {
01146 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
01147 }
01148 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01149 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01150
01151 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
01152 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01153 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01154 s->filter.simple, 0);
01155 }
01156
01157 static const uint8_t subpel_idx[3][8] = {
01158 { 0, 1, 2, 1, 2, 1, 2, 1 },
01159
01160 { 0, 3, 5, 3, 5, 3, 5, 3 },
01161 { 0, 2, 3, 2, 3, 2, 3, 2 },
01162 };
01163
01180 static av_always_inline
01181 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
01182 AVFrame *ref, const VP56mv *mv,
01183 int x_off, int y_off, int block_w, int block_h,
01184 int width, int height, int linesize,
01185 vp8_mc_func mc_func[3][3])
01186 {
01187 uint8_t *src = ref->data[0];
01188
01189 if (AV_RN32A(mv)) {
01190
01191 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01192 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01193
01194 x_off += mv->x >> 2;
01195 y_off += mv->y >> 2;
01196
01197
01198 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01199 src += y_off * linesize + x_off;
01200 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
01201 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01202 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01203 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01204 x_off - mx_idx, y_off - my_idx, width, height);
01205 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01206 }
01207 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01208 } else {
01209 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01210 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01211 }
01212 }
01213
01231 static av_always_inline
01232 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
01233 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
01234 int block_w, int block_h, int width, int height, int linesize,
01235 vp8_mc_func mc_func[3][3])
01236 {
01237 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01238
01239 if (AV_RN32A(mv)) {
01240 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01241 int my = mv->y&7, my_idx = subpel_idx[0][my];
01242
01243 x_off += mv->x >> 3;
01244 y_off += mv->y >> 3;
01245
01246
01247 src1 += y_off * linesize + x_off;
01248 src2 += y_off * linesize + x_off;
01249 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01250 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
01251 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01252 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01253 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01254 x_off - mx_idx, y_off - my_idx, width, height);
01255 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01256 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01257
01258 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01259 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01260 x_off - mx_idx, y_off - my_idx, width, height);
01261 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01262 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01263 } else {
01264 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01265 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01266 }
01267 } else {
01268 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01269 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01270 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01271 }
01272 }
01273
01274 static av_always_inline
01275 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01276 AVFrame *ref_frame, int x_off, int y_off,
01277 int bx_off, int by_off,
01278 int block_w, int block_h,
01279 int width, int height, VP56mv *mv)
01280 {
01281 VP56mv uvmv = *mv;
01282
01283
01284 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
01285 ref_frame, mv, x_off + bx_off, y_off + by_off,
01286 block_w, block_h, width, height, s->linesize,
01287 s->put_pixels_tab[block_w == 8]);
01288
01289
01290 if (s->profile == 3) {
01291 uvmv.x &= ~7;
01292 uvmv.y &= ~7;
01293 }
01294 x_off >>= 1; y_off >>= 1;
01295 bx_off >>= 1; by_off >>= 1;
01296 width >>= 1; height >>= 1;
01297 block_w >>= 1; block_h >>= 1;
01298 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
01299 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01300 &uvmv, x_off + bx_off, y_off + by_off,
01301 block_w, block_h, width, height, s->uvlinesize,
01302 s->put_pixels_tab[1 + (block_w == 4)]);
01303 }
01304
01305
01306
01307 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01308 {
01309
01310 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01311 int x_off = mb_x << 4, y_off = mb_y << 4;
01312 int mx = (mb->mv.x>>2) + x_off + 8;
01313 int my = (mb->mv.y>>2) + y_off;
01314 uint8_t **src= s->framep[ref]->data;
01315 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01316
01317
01318
01319 s->dsp.prefetch(src[0]+off, s->linesize, 4);
01320 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01321 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01322 }
01323 }
01324
01328 static av_always_inline
01329 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01330 VP8Macroblock *mb, int mb_x, int mb_y)
01331 {
01332 int x_off = mb_x << 4, y_off = mb_y << 4;
01333 int width = 16*s->mb_width, height = 16*s->mb_height;
01334 AVFrame *ref = s->framep[mb->ref_frame];
01335 VP56mv *bmv = mb->bmv;
01336
01337 switch (mb->partitioning) {
01338 case VP8_SPLITMVMODE_NONE:
01339 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01340 0, 0, 16, 16, width, height, &mb->mv);
01341 break;
01342 case VP8_SPLITMVMODE_4x4: {
01343 int x, y;
01344 VP56mv uvmv;
01345
01346
01347 for (y = 0; y < 4; y++) {
01348 for (x = 0; x < 4; x++) {
01349 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
01350 ref, &bmv[4*y + x],
01351 4*x + x_off, 4*y + y_off, 4, 4,
01352 width, height, s->linesize,
01353 s->put_pixels_tab[2]);
01354 }
01355 }
01356
01357
01358 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01359 for (y = 0; y < 2; y++) {
01360 for (x = 0; x < 2; x++) {
01361 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
01362 mb->bmv[ 2*y * 4 + 2*x+1].x +
01363 mb->bmv[(2*y+1) * 4 + 2*x ].x +
01364 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01365 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
01366 mb->bmv[ 2*y * 4 + 2*x+1].y +
01367 mb->bmv[(2*y+1) * 4 + 2*x ].y +
01368 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01369 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01370 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01371 if (s->profile == 3) {
01372 uvmv.x &= ~7;
01373 uvmv.y &= ~7;
01374 }
01375 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
01376 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01377 4*x + x_off, 4*y + y_off, 4, 4,
01378 width, height, s->uvlinesize,
01379 s->put_pixels_tab[2]);
01380 }
01381 }
01382 break;
01383 }
01384 case VP8_SPLITMVMODE_16x8:
01385 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01386 0, 0, 16, 8, width, height, &bmv[0]);
01387 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01388 0, 8, 16, 8, width, height, &bmv[1]);
01389 break;
01390 case VP8_SPLITMVMODE_8x16:
01391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01392 0, 0, 8, 16, width, height, &bmv[0]);
01393 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01394 8, 0, 8, 16, width, height, &bmv[1]);
01395 break;
01396 case VP8_SPLITMVMODE_8x8:
01397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01398 0, 0, 8, 8, width, height, &bmv[0]);
01399 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01400 8, 0, 8, 8, width, height, &bmv[1]);
01401 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01402 0, 8, 8, 8, width, height, &bmv[2]);
01403 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01404 8, 8, 8, 8, width, height, &bmv[3]);
01405 break;
01406 }
01407 }
01408
01409 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
01410 uint8_t *dst[3], VP8Macroblock *mb)
01411 {
01412 int x, y, ch;
01413
01414 if (mb->mode != MODE_I4x4) {
01415 uint8_t *y_dst = dst[0];
01416 for (y = 0; y < 4; y++) {
01417 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
01418 if (nnz4) {
01419 if (nnz4&~0x01010101) {
01420 for (x = 0; x < 4; x++) {
01421 if ((uint8_t)nnz4 == 1)
01422 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
01423 else if((uint8_t)nnz4 > 1)
01424 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
01425 nnz4 >>= 8;
01426 if (!nnz4)
01427 break;
01428 }
01429 } else {
01430 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
01431 }
01432 }
01433 y_dst += 4*s->linesize;
01434 }
01435 }
01436
01437 for (ch = 0; ch < 2; ch++) {
01438 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
01439 if (nnz4) {
01440 uint8_t *ch_dst = dst[1+ch];
01441 if (nnz4&~0x01010101) {
01442 for (y = 0; y < 2; y++) {
01443 for (x = 0; x < 2; x++) {
01444 if ((uint8_t)nnz4 == 1)
01445 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
01446 else if((uint8_t)nnz4 > 1)
01447 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
01448 nnz4 >>= 8;
01449 if (!nnz4)
01450 goto chroma_idct_end;
01451 }
01452 ch_dst += 4*s->uvlinesize;
01453 }
01454 } else {
01455 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
01456 }
01457 }
01458 chroma_idct_end: ;
01459 }
01460 }
01461
01462 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01463 {
01464 int interior_limit, filter_level;
01465
01466 if (s->segmentation.enabled) {
01467 filter_level = s->segmentation.filter_level[mb->segment];
01468 if (!s->segmentation.absolute_vals)
01469 filter_level += s->filter.level;
01470 } else
01471 filter_level = s->filter.level;
01472
01473 if (s->lf_delta.enabled) {
01474 filter_level += s->lf_delta.ref[mb->ref_frame];
01475 filter_level += s->lf_delta.mode[mb->mode];
01476 }
01477
01478 filter_level = av_clip_uintp2(filter_level, 6);
01479
01480 interior_limit = filter_level;
01481 if (s->filter.sharpness) {
01482 interior_limit >>= (s->filter.sharpness + 3) >> 2;
01483 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01484 }
01485 interior_limit = FFMAX(interior_limit, 1);
01486
01487 f->filter_level = filter_level;
01488 f->inner_limit = interior_limit;
01489 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01490 }
01491
01492 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01493 {
01494 int mbedge_lim, bedge_lim, hev_thresh;
01495 int filter_level = f->filter_level;
01496 int inner_limit = f->inner_limit;
01497 int inner_filter = f->inner_filter;
01498 int linesize = s->linesize;
01499 int uvlinesize = s->uvlinesize;
01500 static const uint8_t hev_thresh_lut[2][64] = {
01501 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01502 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01503 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01504 3, 3, 3, 3 },
01505 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01506 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01507 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01508 2, 2, 2, 2 }
01509 };
01510
01511 if (!filter_level)
01512 return;
01513
01514 bedge_lim = 2*filter_level + inner_limit;
01515 mbedge_lim = bedge_lim + 4;
01516
01517 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01518
01519 if (mb_x) {
01520 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
01521 mbedge_lim, inner_limit, hev_thresh);
01522 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
01523 mbedge_lim, inner_limit, hev_thresh);
01524 }
01525
01526 if (inner_filter) {
01527 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01528 inner_limit, hev_thresh);
01529 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01530 inner_limit, hev_thresh);
01531 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01532 inner_limit, hev_thresh);
01533 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01534 uvlinesize, bedge_lim,
01535 inner_limit, hev_thresh);
01536 }
01537
01538 if (mb_y) {
01539 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
01540 mbedge_lim, inner_limit, hev_thresh);
01541 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
01542 mbedge_lim, inner_limit, hev_thresh);
01543 }
01544
01545 if (inner_filter) {
01546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01547 linesize, bedge_lim,
01548 inner_limit, hev_thresh);
01549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01550 linesize, bedge_lim,
01551 inner_limit, hev_thresh);
01552 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01553 linesize, bedge_lim,
01554 inner_limit, hev_thresh);
01555 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01556 dst[2] + 4 * uvlinesize,
01557 uvlinesize, bedge_lim,
01558 inner_limit, hev_thresh);
01559 }
01560 }
01561
01562 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01563 {
01564 int mbedge_lim, bedge_lim;
01565 int filter_level = f->filter_level;
01566 int inner_limit = f->inner_limit;
01567 int inner_filter = f->inner_filter;
01568 int linesize = s->linesize;
01569
01570 if (!filter_level)
01571 return;
01572
01573 bedge_lim = 2*filter_level + inner_limit;
01574 mbedge_lim = bedge_lim + 4;
01575
01576 if (mb_x)
01577 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01578 if (inner_filter) {
01579 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01580 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01581 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01582 }
01583
01584 if (mb_y)
01585 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01586 if (inner_filter) {
01587 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01588 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01589 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01590 }
01591 }
01592
01593 static void release_queued_segmaps(VP8Context *s, int is_close)
01594 {
01595 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
01596 while (s->num_maps_to_be_freed > leave_behind)
01597 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
01598 s->maps_are_invalid = 0;
01599 }
01600
01601 #define MARGIN (16 << 2)
01602 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
01603 AVFrame *prev_frame)
01604 {
01605 VP8Context *s = avctx->priv_data;
01606 int mb_x, mb_y;
01607
01608 s->mv_min.y = -MARGIN;
01609 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01610 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01611 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01612 int mb_xy = mb_y*s->mb_width;
01613
01614 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01615
01616 s->mv_min.x = -MARGIN;
01617 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
01618 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01619 if (mb_y == 0)
01620 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
01621 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01622 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
01623 s->mv_min.x -= 64;
01624 s->mv_max.x -= 64;
01625 }
01626 s->mv_min.y -= 64;
01627 s->mv_max.y -= 64;
01628 }
01629 }
01630
01631 #if HAVE_THREADS
01632 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
01633 do {\
01634 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
01635 if (otd->thread_mb_pos < tmp) {\
01636 pthread_mutex_lock(&otd->lock);\
01637 td->wait_mb_pos = tmp;\
01638 do {\
01639 if (otd->thread_mb_pos >= tmp)\
01640 break;\
01641 pthread_cond_wait(&otd->cond, &otd->lock);\
01642 } while (1);\
01643 td->wait_mb_pos = INT_MAX;\
01644 pthread_mutex_unlock(&otd->lock);\
01645 }\
01646 } while(0);
01647
01648 #define update_pos(td, mb_y, mb_x)\
01649 do {\
01650 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
01651 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
01652 int is_null = (next_td == NULL) || (prev_td == NULL);\
01653 int pos_check = (is_null) ? 1 :\
01654 (next_td != td && pos >= next_td->wait_mb_pos) ||\
01655 (prev_td != td && pos >= prev_td->wait_mb_pos);\
01656 td->thread_mb_pos = pos;\
01657 if (sliced_threading && pos_check) {\
01658 pthread_mutex_lock(&td->lock);\
01659 pthread_cond_broadcast(&td->cond);\
01660 pthread_mutex_unlock(&td->lock);\
01661 }\
01662 } while(0);
01663 #else
01664 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
01665 #define update_pos(td, mb_y, mb_x)
01666 #endif
01667
01668 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
01669 int jobnr, int threadnr)
01670 {
01671 VP8Context *s = avctx->priv_data;
01672 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
01673 int mb_y = td->thread_mb_pos>>16;
01674 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
01675 int num_jobs = s->num_jobs;
01676 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
01677 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01678 VP8Macroblock *mb;
01679 uint8_t *dst[3] = {
01680 curframe->data[0] + 16*mb_y*s->linesize,
01681 curframe->data[1] + 8*mb_y*s->uvlinesize,
01682 curframe->data[2] + 8*mb_y*s->uvlinesize
01683 };
01684 if (mb_y == 0) prev_td = td;
01685 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
01686 if (mb_y == s->mb_height-1) next_td = td;
01687 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
01688 if (s->mb_layout == 1)
01689 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01690 else {
01691 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01692 memset(mb - 1, 0, sizeof(*mb));
01693 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01694 }
01695
01696 memset(td->left_nnz, 0, sizeof(td->left_nnz));
01697
01698 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01699 for (i = 0; i < 3; i++)
01700 for (y = 0; y < 16>>!!i; y++)
01701 dst[i][y*curframe->linesize[i]-1] = 129;
01702 if (mb_y == 1) {
01703 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01704 }
01705 }
01706
01707 s->mv_min.x = -MARGIN;
01708 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
01709
01710 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01711
01712 if (prev_td != td) {
01713 if (threadnr != 0) {
01714 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
01715 } else {
01716 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
01717 }
01718 }
01719
01720 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01721 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01722
01723 if (!s->mb_layout)
01724 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01725 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
01726
01727 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01728
01729 if (!mb->skip)
01730 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
01731
01732 if (mb->mode <= MODE_I4x4)
01733 intra_predict(s, td, dst, mb, mb_x, mb_y);
01734 else
01735 inter_predict(s, td, dst, mb, mb_x, mb_y);
01736
01737 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01738
01739 if (!mb->skip) {
01740 idct_mb(s, td, dst, mb);
01741 } else {
01742 AV_ZERO64(td->left_nnz);
01743 AV_WN64(s->top_nnz[mb_x], 0);
01744
01745
01746 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01747 td->left_nnz[8] = 0;
01748 s->top_nnz[mb_x][8] = 0;
01749 }
01750 }
01751
01752 if (s->deblock_filter)
01753 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
01754
01755 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
01756 if (s->filter.simple)
01757 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
01758 else
01759 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01760 }
01761
01762 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01763
01764 dst[0] += 16;
01765 dst[1] += 8;
01766 dst[2] += 8;
01767 s->mv_min.x -= 64;
01768 s->mv_max.x -= 64;
01769
01770 if (mb_x == s->mb_width+1) {
01771 update_pos(td, mb_y, s->mb_width+3);
01772 } else {
01773 update_pos(td, mb_y, mb_x);
01774 }
01775 }
01776 }
01777
01778 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
01779 int jobnr, int threadnr)
01780 {
01781 VP8Context *s = avctx->priv_data;
01782 VP8ThreadData *td = &s->thread_data[threadnr];
01783 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
01784 AVFrame *curframe = s->curframe;
01785 VP8Macroblock *mb;
01786 VP8ThreadData *prev_td, *next_td;
01787 uint8_t *dst[3] = {
01788 curframe->data[0] + 16*mb_y*s->linesize,
01789 curframe->data[1] + 8*mb_y*s->uvlinesize,
01790 curframe->data[2] + 8*mb_y*s->uvlinesize
01791 };
01792
01793 if (s->mb_layout == 1)
01794 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01795 else
01796 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01797
01798 if (mb_y == 0) prev_td = td;
01799 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
01800 if (mb_y == s->mb_height-1) next_td = td;
01801 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
01802
01803 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
01804 VP8FilterStrength *f = &td->filter_strength[mb_x];
01805 if (prev_td != td) {
01806 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
01807 }
01808 if (next_td != td)
01809 if (next_td != &s->thread_data[0]) {
01810 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
01811 }
01812
01813 if (num_jobs == 1) {
01814 if (s->filter.simple)
01815 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
01816 else
01817 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01818 }
01819
01820 if (s->filter.simple)
01821 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
01822 else
01823 filter_mb(s, dst, f, mb_x, mb_y);
01824 dst[0] += 16;
01825 dst[1] += 8;
01826 dst[2] += 8;
01827
01828 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
01829 }
01830 }
01831
01832 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
01833 int jobnr, int threadnr)
01834 {
01835 VP8Context *s = avctx->priv_data;
01836 VP8ThreadData *td = &s->thread_data[jobnr];
01837 VP8ThreadData *next_td = NULL, *prev_td = NULL;
01838 AVFrame *curframe = s->curframe;
01839 int mb_y, num_jobs = s->num_jobs;
01840 td->thread_nr = threadnr;
01841 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
01842 if (mb_y >= s->mb_height) break;
01843 td->thread_mb_pos = mb_y<<16;
01844 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
01845 if (s->deblock_filter)
01846 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
01847 update_pos(td, mb_y, INT_MAX & 0xFFFF);
01848
01849 s->mv_min.y -= 64;
01850 s->mv_max.y -= 64;
01851
01852 if (avctx->active_thread_type == FF_THREAD_FRAME)
01853 ff_thread_report_progress(curframe, mb_y, 0);
01854 }
01855
01856 return 0;
01857 }
01858
01859 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
01860 AVPacket *avpkt)
01861 {
01862 VP8Context *s = avctx->priv_data;
01863 int ret, i, referenced, num_jobs;
01864 enum AVDiscard skip_thresh;
01865 AVFrame *av_uninit(curframe), *prev_frame;
01866
01867 release_queued_segmaps(s, 0);
01868
01869 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01870 goto err;
01871
01872 prev_frame = s->framep[VP56_FRAME_CURRENT];
01873
01874 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01875 || s->update_altref == VP56_FRAME_CURRENT;
01876
01877 skip_thresh = !referenced ? AVDISCARD_NONREF :
01878 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01879
01880 if (avctx->skip_frame >= skip_thresh) {
01881 s->invisible = 1;
01882 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
01883 goto skip_decode;
01884 }
01885 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01886
01887
01888 for (i = 0; i < 5; i++)
01889 if (s->frames[i].data[0] &&
01890 &s->frames[i] != prev_frame &&
01891 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01893 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01894 vp8_release_frame(s, &s->frames[i], 1, 0);
01895
01896
01897 for (i = 0; i < 5; i++)
01898 if (&s->frames[i] != prev_frame &&
01899 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01901 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01902 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01903 break;
01904 }
01905 if (i == 5) {
01906 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01907 abort();
01908 }
01909 if (curframe->data[0])
01910 vp8_release_frame(s, curframe, 1, 0);
01911
01912
01913
01914
01915 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01916 !s->framep[VP56_FRAME_GOLDEN] ||
01917 !s->framep[VP56_FRAME_GOLDEN2])) {
01918 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01919 ret = AVERROR_INVALIDDATA;
01920 goto err;
01921 }
01922
01923 curframe->key_frame = s->keyframe;
01924 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01925 curframe->reference = referenced ? 3 : 0;
01926 if ((ret = vp8_alloc_frame(s, curframe))) {
01927 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01928 goto err;
01929 }
01930
01931
01932 if (s->update_altref != VP56_FRAME_NONE) {
01933 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
01934 } else {
01935 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
01936 }
01937 if (s->update_golden != VP56_FRAME_NONE) {
01938 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
01939 } else {
01940 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
01941 }
01942 if (s->update_last) {
01943 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01944 } else {
01945 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01946 }
01947 s->next_framep[VP56_FRAME_CURRENT] = curframe;
01948
01949 ff_thread_finish_setup(avctx);
01950
01951 s->linesize = curframe->linesize[0];
01952 s->uvlinesize = curframe->linesize[1];
01953
01954 if (!s->thread_data[0].edge_emu_buffer)
01955 for (i = 0; i < MAX_THREADS; i++)
01956 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
01957
01958 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01959
01960 if (!s->mb_layout)
01961 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01962 if (!s->mb_layout && s->keyframe)
01963 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01964
01965
01966 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01967 s->top_border[0][15] = s->top_border[0][23] = 127;
01968 s->top_border[0][31] = 127;
01969 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
01970 }
01971 memset(s->ref_count, 0, sizeof(s->ref_count));
01972
01973
01974
01975
01976 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01977 ff_thread_await_progress(prev_frame, 1, 0);
01978
01979 if (s->mb_layout == 1)
01980 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
01981
01982 if (avctx->active_thread_type == FF_THREAD_FRAME)
01983 num_jobs = 1;
01984 else
01985 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
01986 s->num_jobs = num_jobs;
01987 s->curframe = curframe;
01988 s->prev_frame = prev_frame;
01989 s->mv_min.y = -MARGIN;
01990 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01991 for (i = 0; i < MAX_THREADS; i++) {
01992 s->thread_data[i].thread_mb_pos = 0;
01993 s->thread_data[i].wait_mb_pos = INT_MAX;
01994 }
01995 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
01996
01997 ff_thread_report_progress(curframe, INT_MAX, 0);
01998 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01999
02000 skip_decode:
02001
02002
02003 if (!s->update_probabilities)
02004 s->prob[0] = s->prob[1];
02005
02006 if (!s->invisible) {
02007 *(AVFrame*)data = *curframe;
02008 *got_frame = 1;
02009 }
02010
02011 return avpkt->size;
02012 err:
02013 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
02014 return ret;
02015 }
02016
02017 static av_cold int vp8_decode_init(AVCodecContext *avctx)
02018 {
02019 VP8Context *s = avctx->priv_data;
02020
02021 s->avctx = avctx;
02022 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
02023
02024 ff_dsputil_init(&s->dsp, avctx);
02025 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
02026 ff_vp8dsp_init(&s->vp8dsp);
02027
02028 return 0;
02029 }
02030
02031 static av_cold int vp8_decode_free(AVCodecContext *avctx)
02032 {
02033 vp8_decode_flush_impl(avctx, 0, 1, 1);
02034 release_queued_segmaps(avctx->priv_data, 1);
02035 return 0;
02036 }
02037
02038 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
02039 {
02040 VP8Context *s = avctx->priv_data;
02041
02042 s->avctx = avctx;
02043
02044 return 0;
02045 }
02046
02047 #define REBASE(pic) \
02048 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
02049
02050 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
02051 {
02052 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
02053
02054 if (s->macroblocks_base &&
02055 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
02056 free_buffers(s);
02057 s->maps_are_invalid = 1;
02058 s->mb_width = s_src->mb_width;
02059 s->mb_height = s_src->mb_height;
02060 }
02061
02062 s->prob[0] = s_src->prob[!s_src->update_probabilities];
02063 s->segmentation = s_src->segmentation;
02064 s->lf_delta = s_src->lf_delta;
02065 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
02066
02067 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
02068 s->framep[0] = REBASE(s_src->next_framep[0]);
02069 s->framep[1] = REBASE(s_src->next_framep[1]);
02070 s->framep[2] = REBASE(s_src->next_framep[2]);
02071 s->framep[3] = REBASE(s_src->next_framep[3]);
02072
02073 return 0;
02074 }
02075
02076 AVCodec ff_vp8_decoder = {
02077 .name = "vp8",
02078 .type = AVMEDIA_TYPE_VIDEO,
02079 .id = AV_CODEC_ID_VP8,
02080 .priv_data_size = sizeof(VP8Context),
02081 .init = vp8_decode_init,
02082 .close = vp8_decode_free,
02083 .decode = vp8_decode_frame,
02084 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
02085 .flush = vp8_decode_flush,
02086 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
02087 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
02088 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
02089 };