00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #undef FUNC
00023 #undef PIXEL_SHIFT
00024
00025 #if SIMPLE
00026 # define FUNC(n) AV_JOIN(n ## _simple_, BITS)
00027 # define PIXEL_SHIFT (BITS >> 4)
00028 #else
00029 # define FUNC(n) n ## _complex
00030 # define PIXEL_SHIFT h->pixel_shift
00031 #endif
00032
00033 #undef CHROMA_IDC
00034 #define CHROMA_IDC 1
00035 #include "h264_mc_template.c"
00036
00037 #undef CHROMA_IDC
00038 #define CHROMA_IDC 2
00039 #include "h264_mc_template.c"
00040
00041 static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
00042 {
00043 MpegEncContext *const s = &h->s;
00044 const int mb_x = s->mb_x;
00045 const int mb_y = s->mb_y;
00046 const int mb_xy = h->mb_xy;
00047 const int mb_type = s->current_picture.f.mb_type[mb_xy];
00048 uint8_t *dest_y, *dest_cb, *dest_cr;
00049 int linesize, uvlinesize ;
00050 int i, j;
00051 int *block_offset = &h->block_offset[0];
00052 const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
00053
00054 const int is_h264 = !CONFIG_SVQ3_DECODER || SIMPLE || s->codec_id == AV_CODEC_ID_H264;
00055 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
00056 const int block_h = 16 >> s->chroma_y_shift;
00057 const int chroma422 = CHROMA422;
00058
00059 dest_y = s->current_picture.f.data[0] + ((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16;
00060 dest_cb = s->current_picture.f.data[1] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
00061 dest_cr = s->current_picture.f.data[2] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
00062
00063 s->dsp.prefetch(dest_y + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT), s->linesize, 4);
00064 s->dsp.prefetch(dest_cb + (s->mb_x & 7) * s->uvlinesize + (64 << PIXEL_SHIFT), dest_cr - dest_cb, 2);
00065
00066 h->list_counts[mb_xy] = h->list_count;
00067
00068 if (!SIMPLE && MB_FIELD) {
00069 linesize = h->mb_linesize = s->linesize * 2;
00070 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
00071 block_offset = &h->block_offset[48];
00072 if (mb_y & 1) {
00073 dest_y -= s->linesize * 15;
00074 dest_cb -= s->uvlinesize * (block_h - 1);
00075 dest_cr -= s->uvlinesize * (block_h - 1);
00076 }
00077 if (FRAME_MBAFF) {
00078 int list;
00079 for (list = 0; list < h->list_count; list++) {
00080 if (!USES_LIST(mb_type, list))
00081 continue;
00082 if (IS_16X16(mb_type)) {
00083 int8_t *ref = &h->ref_cache[list][scan8[0]];
00084 fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
00085 } else {
00086 for (i = 0; i < 16; i += 4) {
00087 int ref = h->ref_cache[list][scan8[i]];
00088 if (ref >= 0)
00089 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
00090 8, (16 + ref) ^ (s->mb_y & 1), 1);
00091 }
00092 }
00093 }
00094 }
00095 } else {
00096 linesize = h->mb_linesize = s->linesize;
00097 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
00098
00099 }
00100
00101 if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
00102 const int bit_depth = h->sps.bit_depth_luma;
00103 if (PIXEL_SHIFT) {
00104 int j;
00105 GetBitContext gb;
00106 init_get_bits(&gb, (uint8_t *)h->mb,
00107 ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
00108
00109 for (i = 0; i < 16; i++) {
00110 uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
00111 for (j = 0; j < 16; j++)
00112 tmp_y[j] = get_bits(&gb, bit_depth);
00113 }
00114 if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00115 if (!h->sps.chroma_format_idc) {
00116 for (i = 0; i < block_h; i++) {
00117 uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
00118 uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
00119 for (j = 0; j < 8; j++) {
00120 tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
00121 }
00122 }
00123 } else {
00124 for (i = 0; i < block_h; i++) {
00125 uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
00126 for (j = 0; j < 8; j++)
00127 tmp_cb[j] = get_bits(&gb, bit_depth);
00128 }
00129 for (i = 0; i < block_h; i++) {
00130 uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
00131 for (j = 0; j < 8; j++)
00132 tmp_cr[j] = get_bits(&gb, bit_depth);
00133 }
00134 }
00135 }
00136 } else {
00137 for (i = 0; i < 16; i++)
00138 memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
00139 if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00140 if (!h->sps.chroma_format_idc) {
00141 for (i = 0; i < 8; i++) {
00142 memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
00143 memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
00144 }
00145 } else {
00146 uint8_t *src_cb = (uint8_t *)h->mb + 256;
00147 uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
00148 for (i = 0; i < block_h; i++) {
00149 memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
00150 memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
00151 }
00152 }
00153 }
00154 }
00155 } else {
00156 if (IS_INTRA(mb_type)) {
00157 if (h->deblocking_filter)
00158 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
00159 uvlinesize, 1, 0, SIMPLE, PIXEL_SHIFT);
00160
00161 if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00162 if (CHROMA) {
00163 h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
00164 h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
00165 }
00166 }
00167
00168 hl_decode_mb_predict_luma(h, mb_type, is_h264, SIMPLE,
00169 transform_bypass, PIXEL_SHIFT,
00170 block_offset, linesize, dest_y, 0);
00171
00172 if (h->deblocking_filter)
00173 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
00174 uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT);
00175 } else if (is_h264) {
00176 if (chroma422) {
00177 FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
00178 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00179 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00180 h->h264dsp.weight_h264_pixels_tab,
00181 h->h264dsp.biweight_h264_pixels_tab);
00182 } else {
00183 FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
00184 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00185 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00186 h->h264dsp.weight_h264_pixels_tab,
00187 h->h264dsp.biweight_h264_pixels_tab);
00188 }
00189 }
00190
00191 hl_decode_mb_idct_luma(h, mb_type, is_h264, SIMPLE, transform_bypass,
00192 PIXEL_SHIFT, block_offset, linesize, dest_y, 0);
00193
00194 if ((SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) &&
00195 (h->cbp & 0x30)) {
00196 uint8_t *dest[2] = { dest_cb, dest_cr };
00197 if (transform_bypass) {
00198 if (IS_INTRA(mb_type) && h->sps.profile_idc == 244 &&
00199 (h->chroma_pred_mode == VERT_PRED8x8 ||
00200 h->chroma_pred_mode == HOR_PRED8x8)) {
00201 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0],
00202 block_offset + 16,
00203 h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
00204 uvlinesize);
00205 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1],
00206 block_offset + 32,
00207 h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
00208 uvlinesize);
00209 } else {
00210 idct_add = s->dsp.add_pixels4;
00211 for (j = 1; j < 3; j++) {
00212 for (i = j * 16; i < j * 16 + 4; i++)
00213 if (h->non_zero_count_cache[scan8[i]] ||
00214 dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
00215 idct_add(dest[j - 1] + block_offset[i],
00216 h->mb + (i * 16 << PIXEL_SHIFT),
00217 uvlinesize);
00218 if (chroma422) {
00219 for (i = j * 16 + 4; i < j * 16 + 8; i++)
00220 if (h->non_zero_count_cache[scan8[i + 4]] ||
00221 dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
00222 idct_add(dest[j - 1] + block_offset[i + 4],
00223 h->mb + (i * 16 << PIXEL_SHIFT),
00224 uvlinesize);
00225 }
00226 }
00227 }
00228 } else {
00229 if (is_h264) {
00230 int qp[2];
00231 if (chroma422) {
00232 qp[0] = h->chroma_qp[0] + 3;
00233 qp[1] = h->chroma_qp[1] + 3;
00234 } else {
00235 qp[0] = h->chroma_qp[0];
00236 qp[1] = h->chroma_qp[1];
00237 }
00238 if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
00239 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
00240 h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
00241 if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
00242 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
00243 h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
00244 h->h264dsp.h264_idct_add8(dest, block_offset,
00245 h->mb, uvlinesize,
00246 h->non_zero_count_cache);
00247 } else if (CONFIG_SVQ3_DECODER) {
00248 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 1,
00249 h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][h->chroma_qp[0]][0]);
00250 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 2,
00251 h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][h->chroma_qp[1]][0]);
00252 for (j = 1; j < 3; j++) {
00253 for (i = j * 16; i < j * 16 + 4; i++)
00254 if (h->non_zero_count_cache[scan8[i]] || h->mb[i * 16]) {
00255 uint8_t *const ptr = dest[j - 1] + block_offset[i];
00256 ff_svq3_add_idct_c(ptr, h->mb + i * 16,
00257 uvlinesize,
00258 ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
00259 }
00260 }
00261 }
00262 }
00263 }
00264 }
00265 if (h->cbp || IS_INTRA(mb_type)) {
00266 s->dsp.clear_blocks(h->mb);
00267 s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
00268 }
00269 }
00270
00271 #if !SIMPLE || BITS == 8
00272
00273 #undef CHROMA_IDC
00274 #define CHROMA_IDC 3
00275 #include "h264_mc_template.c"
00276
00277 static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
00278 {
00279 MpegEncContext *const s = &h->s;
00280 const int mb_x = s->mb_x;
00281 const int mb_y = s->mb_y;
00282 const int mb_xy = h->mb_xy;
00283 const int mb_type = s->current_picture.f.mb_type[mb_xy];
00284 uint8_t *dest[3];
00285 int linesize;
00286 int i, j, p;
00287 int *block_offset = &h->block_offset[0];
00288 const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
00289 const int plane_count = (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) ? 3 : 1;
00290
00291 for (p = 0; p < plane_count; p++) {
00292 dest[p] = s->current_picture.f.data[p] +
00293 ((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16;
00294 s->dsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT),
00295 s->linesize, 4);
00296 }
00297
00298 h->list_counts[mb_xy] = h->list_count;
00299
00300 if (!SIMPLE && MB_FIELD) {
00301 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
00302 block_offset = &h->block_offset[48];
00303 if (mb_y & 1)
00304 for (p = 0; p < 3; p++)
00305 dest[p] -= s->linesize * 15;
00306 if (FRAME_MBAFF) {
00307 int list;
00308 for (list = 0; list < h->list_count; list++) {
00309 if (!USES_LIST(mb_type, list))
00310 continue;
00311 if (IS_16X16(mb_type)) {
00312 int8_t *ref = &h->ref_cache[list][scan8[0]];
00313 fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
00314 } else {
00315 for (i = 0; i < 16; i += 4) {
00316 int ref = h->ref_cache[list][scan8[i]];
00317 if (ref >= 0)
00318 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
00319 8, (16 + ref) ^ (s->mb_y & 1), 1);
00320 }
00321 }
00322 }
00323 }
00324 } else {
00325 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;
00326 }
00327
00328 if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
00329 if (PIXEL_SHIFT) {
00330 const int bit_depth = h->sps.bit_depth_luma;
00331 GetBitContext gb;
00332 init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);
00333
00334 for (p = 0; p < plane_count; p++)
00335 for (i = 0; i < 16; i++) {
00336 uint16_t *tmp = (uint16_t *)(dest[p] + i * linesize);
00337 for (j = 0; j < 16; j++)
00338 tmp[j] = get_bits(&gb, bit_depth);
00339 }
00340 } else {
00341 for (p = 0; p < plane_count; p++)
00342 for (i = 0; i < 16; i++)
00343 memcpy(dest[p] + i * linesize,
00344 (uint8_t *)h->mb + p * 256 + i * 16, 16);
00345 }
00346 } else {
00347 if (IS_INTRA(mb_type)) {
00348 if (h->deblocking_filter)
00349 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
00350 linesize, 1, 1, SIMPLE, PIXEL_SHIFT);
00351
00352 for (p = 0; p < plane_count; p++)
00353 hl_decode_mb_predict_luma(h, mb_type, 1, SIMPLE,
00354 transform_bypass, PIXEL_SHIFT,
00355 block_offset, linesize, dest[p], p);
00356
00357 if (h->deblocking_filter)
00358 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
00359 linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
00360 } else {
00361 FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
00362 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00363 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00364 h->h264dsp.weight_h264_pixels_tab,
00365 h->h264dsp.biweight_h264_pixels_tab);
00366 }
00367
00368 for (p = 0; p < plane_count; p++)
00369 hl_decode_mb_idct_luma(h, mb_type, 1, SIMPLE, transform_bypass,
00370 PIXEL_SHIFT, block_offset, linesize,
00371 dest[p], p);
00372 }
00373 if (h->cbp || IS_INTRA(mb_type)) {
00374 s->dsp.clear_blocks(h->mb);
00375 s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
00376 }
00377 }
00378
00379 #endif