44 int filter_height_down = (raw_my & 3) ? 3 : 0;
45 int full_my = (raw_my >> 2) + y_offset;
46 int bottom = full_my + filter_height_down +
height;
50 return FFMAX(0, bottom);
54 int16_t refs[2][48],
int n,
55 int height,
int y_offset,
int list0,
56 int list1,
int *nrefs)
69 if (
ref->parent->tf.progress !=
h->cur_pic.tf.progress ||
70 (
ref->reference & 3) !=
h->picture_structure) {
72 if (refs[0][ref_n] < 0)
74 refs[0][ref_n] =
FFMAX(refs[0][ref_n], my);
82 if (
ref->parent->tf.progress !=
h->cur_pic.tf.progress ||
83 (
ref->reference & 3) !=
h->picture_structure) {
85 if (refs[1][ref_n] < 0)
87 refs[1][ref_n] =
FFMAX(refs[1][ref_n], my);
99 const int mb_xy = sl->
mb_xy;
100 const int mb_type =
h->cur_pic.mb_type[mb_xy];
102 int nrefs[2] = { 0 };
105 memset(refs, -1,
sizeof(refs));
125 for (
i = 0;
i < 4;
i++) {
128 int y_offset = (
i & 2) << 2;
132 IS_DIR(sub_mb_type, 0, 0),
133 IS_DIR(sub_mb_type, 0, 1),
137 IS_DIR(sub_mb_type, 0, 0),
138 IS_DIR(sub_mb_type, 0, 1),
141 IS_DIR(sub_mb_type, 0, 0),
142 IS_DIR(sub_mb_type, 0, 1),
146 IS_DIR(sub_mb_type, 0, 0),
147 IS_DIR(sub_mb_type, 0, 1),
150 IS_DIR(sub_mb_type, 0, 0),
151 IS_DIR(sub_mb_type, 0, 1),
156 for (j = 0; j < 4; j++) {
157 int sub_y_offset = y_offset + 2 * (j & 2);
159 IS_DIR(sub_mb_type, 0, 0),
160 IS_DIR(sub_mb_type, 0, 1),
174 int pic_height = 16 *
h->mb_height >> ref_field_picture;
182 FFMIN((row >> 1) - !(row & 1),
186 FFMIN((row >> 1), pic_height - 1),
190 FFMIN(row * 2 + ref_field,
195 FFMIN(row, pic_height - 1),
199 FFMIN(row, pic_height - 1),
210 uint8_t *dest_y, uint8_t *dest_cb,
212 int src_x_offset,
int src_y_offset,
215 int pixel_shift,
int chroma_idc)
219 const int luma_xy = (mx & 3) + ((my & 3) << 2);
222 uint8_t *src_cb, *src_cr;
224 int extra_height = 0;
226 const int full_mx = mx >> 2;
227 const int full_my = my >> 2;
228 const int pic_width = 16 *
h->mb_width;
229 const int pic_height = 16 *
h->mb_height >>
MB_FIELD(sl);
237 if (full_mx < 0 - extra_width ||
238 full_my < 0 - extra_height ||
239 full_mx + 16 > pic_width + extra_width ||
240 full_my + 16 > pic_height + extra_height) {
244 16 + 5, 16 + 5 , full_mx - 2,
245 full_my - 2, pic_width, pic_height);
257 if (chroma_idc == 3 ) {
264 full_mx - 2, full_my - 2,
265 pic_width, pic_height);
268 qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize);
278 full_mx - 2, full_my - 2,
279 pic_width, pic_height);
282 qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize);
288 ysh = 3 - (chroma_idc == 2 );
289 if (chroma_idc == 1 &&
MB_FIELD(sl)) {
292 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
295 src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
297 src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
303 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
304 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
308 height >> (chroma_idc == 1 ),
309 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
314 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
315 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
319 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
325 uint8_t *dest_y, uint8_t *dest_cb,
327 int x_offset,
int y_offset,
332 int list0,
int list1,
333 int pixel_shift,
int chroma_idc)
338 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
339 if (chroma_idc == 3 ) {
340 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
341 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
342 }
else if (chroma_idc == 2 ) {
343 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
344 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
346 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
347 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
349 x_offset += 8 * sl->
mb_x;
355 dest_y, dest_cb, dest_cr, x_offset, y_offset,
356 qpix_op, chroma_op, pixel_shift, chroma_idc);
359 chroma_op = chroma_avg;
365 dest_y, dest_cb, dest_cr, x_offset, y_offset,
366 qpix_op, chroma_op, pixel_shift, chroma_idc);
373 uint8_t *dest_y, uint8_t *dest_cb,
375 int x_offset,
int y_offset,
382 int list0,
int list1,
383 int pixel_shift,
int chroma_idc)
387 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
388 if (chroma_idc == 3 ) {
390 chroma_weight_avg = luma_weight_avg;
391 chroma_weight_op = luma_weight_op;
392 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
393 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
394 }
else if (chroma_idc == 2 ) {
396 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
397 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
399 chroma_height =
height >> 1;
400 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
401 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
403 x_offset += 8 * sl->
mb_x;
406 if (list0 && list1) {
416 dest_y, dest_cb, dest_cr,
417 x_offset, y_offset, qpix_put, chroma_put,
418 pixel_shift, chroma_idc);
420 tmp_y, tmp_cb, tmp_cr,
421 x_offset, y_offset, qpix_put, chroma_put,
422 pixel_shift, chroma_idc);
426 int weight1 = 64 - weight0;
428 height, 5, weight0, weight1, 0);
431 chroma_height, 5, weight0, weight1, 0);
433 chroma_height, 5, weight0, weight1, 0);
443 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
449 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
458 int list = list1 ? 1 : 0;
462 dest_y, dest_cb, dest_cr, x_offset, y_offset,
463 qpix_put, chroma_put, pixel_shift, chroma_idc);
485 int list,
int pixel_shift,
495 int off = mx * (1<< pixel_shift) +
499 if (chroma_idc == 3 ) {
503 off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
504 h->vdsp.prefetch(
src[1] + off,
src[2] -
src[1], 2);
511 uint8_t *src_cb, uint8_t *src_cr,
512 int linesize,
int uvlinesize,
513 int xchg,
int chroma444,
514 int simple,
int pixel_shift)
519 uint8_t *top_border_m1;
532 deblock_topleft =
h->slice_table[sl->
mb_xy - 1 -
h->mb_stride] == sl->
slice_num;
535 deblock_topleft = (sl->
mb_x > 0);
539 src_y -= linesize + 1 + pixel_shift;
540 src_cb -= uvlinesize + 1 + pixel_shift;
541 src_cr -= uvlinesize + 1 + pixel_shift;
546 #define XCHG(a, b, xchg) \
549 AV_SWAP64(b + 0, a + 0); \
550 AV_SWAP64(b + 8, a + 8); \
560 if (deblock_topleft) {
561 XCHG(top_border_m1 + (8 << pixel_shift),
562 src_y - (7 << pixel_shift), 1);
564 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
565 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
566 if (sl->
mb_x + 1 <
h->mb_width) {
568 src_y + (17 << pixel_shift), 1);
572 if (deblock_topleft) {
573 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
574 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
576 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
577 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
578 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
579 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
580 if (sl->
mb_x + 1 <
h->mb_width) {
581 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
582 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
585 if (deblock_topleft) {
586 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
587 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
589 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
590 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
599 if (high_bit_depth) {
608 if (high_bit_depth) {
616 int mb_type,
int simple,
617 int transform_bypass,
619 const int *block_offset,
621 uint8_t *dest_y,
int p)
624 void (*idct_dc_add)(uint8_t *dst, int16_t *
block,
int stride);
627 block_offset += 16 * p;
630 if (transform_bypass) {
632 idct_add =
h->h264dsp.h264_add_pixels8_clear;
634 idct_dc_add =
h->h264dsp.h264_idct8_dc_add;
637 for (
i = 0;
i < 16;
i += 4) {
638 uint8_t *
const ptr = dest_y + block_offset[
i];
640 if (transform_bypass &&
h->ps.sps->profile_idc == 244 && dir <= 1) {
641 if (
h->x264_build < 151
U) {
642 h->hpc.pred8x8l_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
644 h->hpc.pred8x8l_filter_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift),
645 (sl-> topleft_samples_available <<
i) & 0x8000,
652 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift,
i * 16 + p * 256))
653 idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
655 idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
660 if (transform_bypass) {
662 idct_add =
h->h264dsp.h264_add_pixels4_clear;
664 idct_dc_add =
h->h264dsp.h264_idct_dc_add;
667 for (
i = 0;
i < 16;
i++) {
668 uint8_t *
const ptr = dest_y + block_offset[
i];
671 if (transform_bypass &&
h->ps.sps->profile_idc == 244 && dir <= 1) {
672 h->hpc.pred4x4_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
680 if (!topright_avail) {
682 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
683 topright = (uint8_t *)&tr_high;
685 tr = ptr[3 - linesize] * 0x01010101
u;
686 topright = (uint8_t *)&tr;
689 topright = ptr + (4 << pixel_shift) - linesize;
693 h->hpc.pred4x4[dir](ptr, topright, linesize);
696 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift,
i * 16 + p * 256))
697 idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
699 idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
707 if (!transform_bypass)
708 h->h264dsp.h264_luma_dc_dequant_idct(sl->
mb + (p * 256 << pixel_shift),
710 h->ps.pps->dequant4_coeff[p][qscale][0]);
712 static const uint8_t dc_mapping[16] = {
713 0 * 16, 1 * 16, 4 * 16, 5 * 16,
714 2 * 16, 3 * 16, 6 * 16, 7 * 16,
715 8 * 16, 9 * 16, 12 * 16, 13 * 16,
716 10 * 16, 11 * 16, 14 * 16, 15 * 16
718 for (
i = 0;
i < 16;
i++)
720 pixel_shift, dc_mapping[
i],
729 int mb_type,
int simple,
730 int transform_bypass,
732 const int *block_offset,
734 uint8_t *dest_y,
int p)
738 block_offset += 16 * p;
741 if (transform_bypass) {
742 if (
h->ps.sps->profile_idc == 244 &&
746 sl->
mb + (p * 256 << pixel_shift),
749 for (
i = 0;
i < 16;
i++)
752 h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[
i],
753 sl->
mb + (
i * 16 + p * 256 << pixel_shift),
757 h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
758 sl->
mb + (p * 256 << pixel_shift),
762 }
else if (sl->
cbp & 15) {
763 if (transform_bypass) {
764 const int di =
IS_8x8DCT(mb_type) ? 4 : 1;
766 :
h->h264dsp.h264_add_pixels4_clear;
767 for (
i = 0;
i < 16;
i += di)
770 sl->
mb + (
i * 16 + p * 256 << pixel_shift),
774 h->h264dsp.h264_idct8_add4(dest_y, block_offset,
775 sl->
mb + (p * 256 << pixel_shift),
779 h->h264dsp.h264_idct_add16(dest_y, block_offset,
780 sl->
mb + (p * 256 << pixel_shift),
802 const int mb_xy = sl->
mb_xy;
803 const int mb_type =
h->cur_pic.mb_type[mb_xy];
804 int is_complex = CONFIG_SMALL || sl->
is_complex ||
808 if (is_complex ||
h->pixel_shift)
809 hl_decode_mb_444_complex(
h, sl);
811 hl_decode_mb_444_simple_8(
h, sl);
812 }
else if (is_complex) {
813 hl_decode_mb_complex(
h, sl);
814 }
else if (
h->pixel_shift) {
815 hl_decode_mb_simple_16(
h, sl);
817 hl_decode_mb_simple_8(
h, sl);