43 int filter_height_down = (raw_my & 3) ? 3 : 0;
44 int full_my = (raw_my >> 2) + y_offset;
45 int bottom = full_my + filter_height_down +
height;
49 return FFMAX(0, bottom);
53 int16_t refs[2][48],
int n,
54 int height,
int y_offset,
int list0,
55 int list1,
int *nrefs)
71 if (refs[0][ref_n] < 0)
73 refs[0][ref_n] =
FFMAX(refs[0][ref_n], my);
84 if (refs[1][ref_n] < 0)
86 refs[1][ref_n] =
FFMAX(refs[1][ref_n], my);
98 const int mb_xy = sl->
mb_xy;
101 int nrefs[2] = { 0 };
104 memset(refs, -1,
sizeof(refs));
124 for (i = 0; i < 4; i++) {
127 int y_offset = (i & 2) << 2;
131 IS_DIR(sub_mb_type, 0, 0),
132 IS_DIR(sub_mb_type, 0, 1),
136 IS_DIR(sub_mb_type, 0, 0),
137 IS_DIR(sub_mb_type, 0, 1),
140 IS_DIR(sub_mb_type, 0, 0),
141 IS_DIR(sub_mb_type, 0, 1),
145 IS_DIR(sub_mb_type, 0, 0),
146 IS_DIR(sub_mb_type, 0, 1),
149 IS_DIR(sub_mb_type, 0, 0),
150 IS_DIR(sub_mb_type, 0, 1),
155 for (j = 0; j < 4; j++) {
156 int sub_y_offset = y_offset + 2 * (j & 2);
158 IS_DIR(sub_mb_type, 0, 0),
159 IS_DIR(sub_mb_type, 0, 1),
166 for (list = sl->
list_count - 1; list >= 0; list--)
167 for (ref = 0; ref < 48 && nrefs[
list]; ref++) {
173 int pic_height = 16 * h->
mb_height >> ref_field_picture;
181 FFMIN((row >> 1) - !(row & 1),
185 FFMIN((row >> 1), pic_height - 1),
189 FFMIN(row * 2 + ref_field,
194 FFMIN(row, pic_height - 1),
198 FFMIN(row, pic_height - 1),
211 int src_x_offset,
int src_y_offset,
214 int pixel_shift,
int chroma_idc)
218 const int luma_xy = (mx & 3) + ((my & 3) << 2);
223 int extra_height = 0;
225 const int full_mx = mx >> 2;
226 const int full_my = my >> 2;
227 const int pic_width = 16 * h->
mb_width;
236 if (full_mx < 0 - extra_width ||
237 full_my < 0 - extra_height ||
238 full_mx + 16 > pic_width + extra_width ||
239 full_my + 16 > pic_height + extra_height) {
243 16 + 5, 16 + 5 , full_mx - 2,
244 full_my - 2, pic_width, pic_height);
256 if (chroma_idc == 3 ) {
263 full_mx - 2, full_my - 2,
264 pic_width, pic_height);
267 qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize);
277 full_mx - 2, full_my - 2,
278 pic_width, pic_height);
281 qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize);
287 ysh = 3 - (chroma_idc == 2 );
288 if (chroma_idc == 1 &&
MB_FIELD(sl)) {
291 emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
294 src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
296 src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
302 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
303 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
307 height >> (chroma_idc == 1 ),
308 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
313 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
314 pic_width >> 1, pic_height >> (chroma_idc == 1 ));
317 chroma_op(dest_cr, src_cr, sl->
mb_uvlinesize, height >> (chroma_idc == 1 ),
318 mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
326 int x_offset,
int y_offset,
331 int list0,
int list1,
332 int pixel_shift,
int chroma_idc)
337 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
338 if (chroma_idc == 3 ) {
339 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
340 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
341 }
else if (chroma_idc == 2 ) {
342 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
343 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
345 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
346 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
348 x_offset += 8 * sl->
mb_x;
353 mc_dir_part(h, sl, ref, n, square, height, delta, 0,
354 dest_y, dest_cb, dest_cr, x_offset, y_offset,
355 qpix_op, chroma_op, pixel_shift, chroma_idc);
358 chroma_op = chroma_avg;
363 mc_dir_part(h, sl, ref, n, square, height, delta, 1,
364 dest_y, dest_cb, dest_cr, x_offset, y_offset,
365 qpix_op, chroma_op, pixel_shift, chroma_idc);
374 int x_offset,
int y_offset,
381 int list0,
int list1,
382 int pixel_shift,
int chroma_idc)
386 dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
387 if (chroma_idc == 3 ) {
389 chroma_weight_avg = luma_weight_avg;
390 chroma_weight_op = luma_weight_op;
391 dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
392 dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
393 }
else if (chroma_idc == 2 ) {
395 dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
396 dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
398 chroma_height = height >> 1;
399 dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
400 dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
402 x_offset += 8 * sl->
mb_x;
405 if (list0 && list1) {
415 dest_y, dest_cb, dest_cr,
416 x_offset, y_offset, qpix_put, chroma_put,
417 pixel_shift, chroma_idc);
419 tmp_y, tmp_cb, tmp_cr,
420 x_offset, y_offset, qpix_put, chroma_put,
421 pixel_shift, chroma_idc);
425 int weight1 = 64 - weight0;
427 height, 5, weight0, weight1, 0);
430 chroma_height, 5, weight0, weight1, 0);
432 chroma_height, 5, weight0, weight1, 0);
435 luma_weight_avg(dest_y, tmp_y, sl->
mb_linesize, height,
442 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
448 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
457 int list = list1 ? 1 : 0;
460 mc_dir_part(h, sl, ref, n, square, height, delta, list,
461 dest_y, dest_cb, dest_cr, x_offset, y_offset,
462 qpix_put, chroma_put, pixel_shift, chroma_idc);
484 int list,
int pixel_shift,
494 int off = mx * (1<< pixel_shift) +
498 if (chroma_idc == 3 ) {
502 off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
511 int linesize,
int uvlinesize,
512 int xchg,
int chroma444,
513 int simple,
int pixel_shift)
534 deblock_topleft = (sl->
mb_x > 0);
538 src_y -= linesize + 1 + pixel_shift;
539 src_cb -= uvlinesize + 1 + pixel_shift;
540 src_cr -= uvlinesize + 1 + pixel_shift;
545 #define XCHG(a, b, xchg) \ 548 AV_SWAP64(b + 0, a + 0); \ 549 AV_SWAP64(b + 8, a + 8); \ 559 if (deblock_topleft) {
560 XCHG(top_border_m1 + (8 << pixel_shift),
561 src_y - (7 << pixel_shift), 1);
563 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
564 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
567 src_y + (17 << pixel_shift), 1);
571 if (deblock_topleft) {
572 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
573 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
575 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
576 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
577 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
578 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
580 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
581 XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
584 if (deblock_topleft) {
585 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
586 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
588 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
589 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
598 if (high_bit_depth) {
607 if (high_bit_depth) {
615 int mb_type,
int simple,
616 int transform_bypass,
618 const int *block_offset,
626 block_offset += 16 * p;
629 if (transform_bypass) {
636 for (i = 0; i < 16; i += 4) {
637 uint8_t *
const ptr = dest_y + block_offset[
i];
641 h->
hpc.
pred8x8l_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
644 (sl-> topleft_samples_available << i) & 0x8000,
651 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
652 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
654 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
659 if (transform_bypass) {
666 for (i = 0; i < 16; i++) {
667 uint8_t *
const ptr = dest_y + block_offset[
i];
671 h->
hpc.
pred4x4_add[dir](ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
679 if (!topright_avail) {
681 tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
682 topright = (
uint8_t *)&tr_high;
684 tr = ptr[3 - linesize] * 0x01010101
u;
688 topright = ptr + (4 << pixel_shift) - linesize;
695 if (nnz == 1 &&
dctcoef_get(sl->
mb, pixel_shift, i * 16 + p * 256))
696 idct_dc_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
698 idct_add(ptr, sl->
mb + (i * 16 + p * 256 << pixel_shift), linesize);
706 if (!transform_bypass)
711 static const uint8_t dc_mapping[16] = {
712 0 * 16, 1 * 16, 4 * 16, 5 * 16,
713 2 * 16, 3 * 16, 6 * 16, 7 * 16,
714 8 * 16, 9 * 16, 12 * 16, 13 * 16,
715 10 * 16, 11 * 16, 14 * 16, 15 * 16
717 for (i = 0; i < 16; i++)
719 pixel_shift, dc_mapping[i],
728 int mb_type,
int simple,
729 int transform_bypass,
731 const int *block_offset,
737 block_offset += 16 * p;
740 if (transform_bypass) {
745 sl->
mb + (p * 256 << pixel_shift),
748 for (i = 0; i < 16; i++)
752 sl->
mb + (i * 16 + p * 256 << pixel_shift),
757 sl->
mb + (p * 256 << pixel_shift),
761 }
else if (sl->
cbp & 15) {
762 if (transform_bypass) {
763 const int di =
IS_8x8DCT(mb_type) ? 4 : 1;
766 for (i = 0; i < 16; i += di)
769 sl->
mb + (i * 16 + p * 256 << pixel_shift),
774 sl->
mb + (p * 256 << pixel_shift),
779 sl->
mb + (p * 256 << pixel_shift),
801 const int mb_xy = sl->
mb_xy;
803 int is_complex = CONFIG_SMALL || sl->
is_complex ||
808 hl_decode_mb_444_complex(h, sl);
810 hl_decode_mb_444_simple_8(h, sl);
811 }
else if (is_complex) {
812 hl_decode_mb_complex(h, sl);
814 hl_decode_mb_simple_16(h, sl);
816 hl_decode_mb_simple_8(h, sl);
static void await_references(const H264Context *h, H264SliceContext *sl)
Wait until all reference frames are available for MC operations.
void(* h264_biweight_func)(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
void(* pred8x8l_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
int16_t mv_cache[2][5 *8][2]
Motion vector cache.
static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, int list, int pixel_shift, int chroma_idc)
unsigned int topleft_samples_available
int chroma_weight[48][2][2][2]
void(* pred8x8l_filter_add[2])(uint8_t *pix, int16_t *block, int topleft, int topright, ptrdiff_t stride)
void(* prefetch)(uint8_t *buf, ptrdiff_t stride, int h)
Prefetch memory into cache (if supported by hardware).
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them.reget_buffer() and buffer age optimizations no longer work.*The contents of buffers must not be written to after ff_thread_report_progress() has been called on them.This includes draw_edges().Porting codecs to frame threading
static void idct_add(uint8_t *dst, int stride, const uint8_t *src, int in_linesize, int *block)
void(* pred16x16_add[3])(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
uint16_t sub_mb_type[4]
as a DCT coefficient is int32_t in high depth, we need to reserve twice the space.
The exact code depends on how similar the blocks are and how related they are to the block
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
int field_picture
whether or not picture was encoded in separate fields
void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Multithreading support functions.
GLsizei GLboolean const GLfloat * value
#define u(width, name, range_min, range_max)
uint8_t(*[2] top_borders)[(16 *3)*2]
#define IS_DIR(a, part, list)
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
uint32_t(*[6] dequant4_coeff)[16]
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
int luma_weight[48][2][2]
void(* pred4x4[9+3+3])(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, int list0, int list1, int pixel_shift, int chroma_idc)
#define AV_CODEC_FLAG_GRAY
Only decode/encode grayscale.
unsigned int topright_samples_available
H.264 parameter set handling.
int chroma_log2_weight_denom
void(* qpel_mc_func)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
int8_t intra4x4_pred_mode_cache[5 *8]
void(* h264_chroma_mc_func)(uint8_t *dst, uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
int deblocking_filter
disable_deblocking_filter_idc with 1 <-> 0
void(* h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride)
int16_t mb_luma_dc[3][16 *2]
as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too lar...
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, const qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, const qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, int list0, int list1, int pixel_shift, int chroma_idc)
uint16_t * slice_table
slice_table_base + 2*mb_stride + 1
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
int luma_log2_weight_denom
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
H.264 / AVC / MPEG-4 part10 codec.
void(* pred4x4_add[2])(uint8_t *pix, int16_t *block, ptrdiff_t stride)
#define LUMA_DC_BLOCK_INDEX
uint8_t * edge_emu_buffer
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, int index, int value)
Libavcodec external API header.
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining list
uint8_t * data
The data buffer.
int implicit_weight[48][48][2]
static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, int index)
static const uint8_t scan8[16 *3+3]
void(* pred16x16[4+3+2])(uint8_t *src, ptrdiff_t stride)
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
uint8_t non_zero_count_cache[15 *8]
non zero coeff count cache.
int pixel_shift
0 for 8-bit H.264, 1 for high-bit-depth H.264
void(* h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
void(* h264_luma_dc_dequant_idct)(int16_t *output, int16_t *input, int qmul)
void(* h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride)
ptrdiff_t mb_linesize
may be equal to s->linesize or s->linesize * 2, for mbaff
static int get_lowest_part_list_y(H264SliceContext *sl, int n, int height, int y_offset, int list)
static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int chroma444, int simple, int pixel_shift)
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, H264SliceContext *sl, int mb_type, int simple, int transform_bypass, int pixel_shift, const int *block_offset, int linesize, uint8_t *dest_y, int p)
common internal and external API header
static int ref[MAX_W *MAX_W]
static void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, int16_t refs[2][48], int n, int height, int y_offset, int list0, int list1, int *nrefs)
int8_t ref_cache[2][5 *8]
static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, H264Ref *pic, int n, int square, int height, int delta, int list, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, const qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, int pixel_shift, int chroma_idc)
H264Ref ref_list[2][48]
0..15: frame refs, 16..47: mbaff field refs.
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
uint8_t * bipred_scratchpad
void(* pred8x8l[9+3])(uint8_t *src, int topleft, int topright, ptrdiff_t stride)