39 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
109 f->segmentation_map =
NULL;
110 f->hwaccel_picture_private =
NULL;
122 sz = 64 *
s->sb_cols *
s->sb_rows;
123 if (sz !=
s->frame_extradata_pool_size) {
126 if (!
s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
130 s->frame_extradata_pool_size = sz;
136 memset(
f->extradata->data, 0,
f->extradata->size);
138 f->segmentation_map =
f->extradata->data;
146 if (!
f->hwaccel_priv_buf)
148 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
175 if (
src->hwaccel_picture_private) {
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
199 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
204 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
208 switch (
s->pix_fmt) {
211 #if CONFIG_VP9_DXVA2_HWACCEL
214 #if CONFIG_VP9_D3D11VA_HWACCEL
218 #if CONFIG_VP9_NVDEC_HWACCEL
221 #if CONFIG_VP9_VAAPI_HWACCEL
224 #if CONFIG_VP9_VDPAU_HWACCEL
229 #if CONFIG_VP9_NVDEC_HWACCEL
232 #if CONFIG_VP9_VAAPI_HWACCEL
235 #if CONFIG_VP9_VDPAU_HWACCEL
241 *fmtp++ =
s->pix_fmt;
249 s->gf_fmt =
s->pix_fmt;
257 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
260 s->last_fmt =
s->pix_fmt;
261 s->sb_cols = (
w + 63) >> 6;
262 s->sb_rows = (
h + 63) >> 6;
263 s->cols = (
w + 7) >> 3;
264 s->rows = (
h + 7) >> 3;
267 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
271 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
272 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
295 for (
i = 0;
i <
s->active_tile_cols;
i++)
299 if (
s->s.h.bpp !=
s->last_bpp) {
302 s->last_bpp =
s->s.h.bpp;
312 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
315 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
319 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
320 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
322 int sbs =
s->sb_cols *
s->sb_rows;
325 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
326 16 * 16 + 2 * chroma_eobs) * sbs);
327 if (!
td->b_base || !
td->block_base)
329 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
330 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
331 td->eob_base = (
uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
332 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
333 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
337 if (!
td->block_structure)
341 for (
i = 1;
i <
s->active_tile_cols;
i++)
344 for (
i = 0;
i <
s->active_tile_cols;
i++) {
346 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
347 16 * 16 + 2 * chroma_eobs);
348 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
350 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
351 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
352 s->td[
i].eob_base = (
uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
353 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
354 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
358 if (!
s->td[
i].block_structure)
363 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
380 return m - ((v + 1) >> 1);
387 static const uint8_t inv_map_table[255] = {
388 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
389 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
390 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
391 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
392 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
393 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
394 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
395 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
396 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
397 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
398 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
399 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
400 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
401 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
402 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
403 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
404 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
405 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
453 s->s.h.bpp = 8 +
bits * 2;
454 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
460 s->ss_h =
s->ss_v = 0;
474 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
486 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
497 s->ss_h =
s->ss_v = 1;
498 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
509 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
535 s->last_keyframe =
s->s.h.keyframe;
538 last_invisible =
s->s.h.invisible;
541 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
543 if (
s->s.h.keyframe) {
551 s->s.h.refreshrefmask = 0xff;
557 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
558 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
559 if (
s->s.h.intraonly) {
568 s->ss_h =
s->ss_v = 1;
571 s->bytesperpixel = 1;
584 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
586 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
588 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
589 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
590 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
591 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
596 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
597 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
599 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
600 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
602 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
603 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
611 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
618 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
619 s->s.h.signbias[0] !=
s->s.h.signbias[2];
620 if (
s->s.h.allowcompinter) {
621 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
622 s->s.h.fixcompref = 2;
623 s->s.h.varcompref[0] = 0;
624 s->s.h.varcompref[1] = 1;
625 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
626 s->s.h.fixcompref = 1;
627 s->s.h.varcompref[0] = 0;
628 s->s.h.varcompref[1] = 2;
630 s->s.h.fixcompref = 0;
631 s->s.h.varcompref[0] = 1;
632 s->s.h.varcompref[1] = 2;
637 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
638 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
640 if (
s->s.h.keyframe ||
s->s.h.intraonly)
641 s->s.h.framectxid = 0;
644 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
646 s->s.h.lf_delta.ref[0] = 1;
647 s->s.h.lf_delta.ref[1] = 0;
648 s->s.h.lf_delta.ref[2] = -1;
649 s->s.h.lf_delta.ref[3] = -1;
650 s->s.h.lf_delta.mode[0] = 0;
651 s->s.h.lf_delta.mode[1] = 0;
652 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
658 if (
s->s.h.filter.sharpness != sharp) {
659 for (
i = 1;
i <= 63;
i++) {
663 limit >>= (sharp + 3) >> 2;
664 limit =
FFMIN(limit, 9 - sharp);
666 limit =
FFMAX(limit, 1);
668 s->filter_lut.lim_lut[
i] = limit;
669 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) + limit;
672 s->s.h.filter.sharpness = sharp;
673 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
674 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
675 for (
i = 0;
i < 4;
i++)
678 for (
i = 0;
i < 2;
i++)
689 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
690 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
695 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
696 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
697 for (
i = 0;
i < 7;
i++)
700 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
701 for (
i = 0;
i < 3;
i++)
707 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
708 for (
i = 0;
i < 8;
i++) {
709 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
711 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
713 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
714 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
715 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
721 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
722 int qyac, qydc, quvac, quvdc, lflvl, sh;
724 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
725 if (
s->s.h.segmentation.absolute_vals)
730 qyac =
s->s.h.yac_qi;
742 sh =
s->s.h.filter.level >= 32;
743 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
744 if (
s->s.h.segmentation.absolute_vals)
747 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
749 lflvl =
s->s.h.filter.level;
751 if (
s->s.h.lf_delta.enabled) {
752 s->s.h.segmentation.feat[
i].lflvl[0][0] =
753 s->s.h.segmentation.feat[
i].lflvl[0][1] =
755 for (j = 1; j < 4; j++) {
756 s->s.h.segmentation.feat[
i].lflvl[j][0] =
758 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
759 s->s.h.segmentation.feat[
i].lflvl[j][1] =
761 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
764 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
765 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
775 for (
s->s.h.tiling.log2_tile_cols = 0;
776 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
777 s->s.h.tiling.log2_tile_cols++) ;
778 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
780 while (
max >
s->s.h.tiling.log2_tile_cols) {
782 s->s.h.tiling.log2_tile_cols++;
787 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
788 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
793 for (
i = 0;
i <
s->active_tile_cols;
i++)
798 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
801 s->s.h.tiling.tile_cols : 1;
806 n_range_coders =
s->s.h.tiling.tile_cols;
813 for (
i = 0;
i <
s->active_tile_cols;
i++) {
816 rc += n_range_coders;
821 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
822 int valid_ref_frame = 0;
823 for (
i = 0;
i < 3;
i++) {
825 int refw =
ref->width, refh =
ref->height;
829 "Ref pixfmt (%s) did not match current frame (%s)",
833 }
else if (refw ==
w && refh ==
h) {
834 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
838 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
840 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
845 s->mvscale[
i][0] = (refw << 14) /
w;
846 s->mvscale[
i][1] = (refh << 14) /
h;
847 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
848 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
852 if (!valid_ref_frame) {
853 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
858 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
859 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
869 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
876 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
880 if (size2 >
size - (data2 -
data)) {
893 for (
i = 0;
i <
s->active_tile_cols;
i++) {
894 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
895 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
896 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
898 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
900 s->td[
i].nb_block_structure = 0;
906 s->prob.p =
s->prob_ctx[
c].p;
909 if (
s->s.h.lossless) {
913 if (
s->s.h.txfmmode == 3)
917 for (
i = 0;
i < 2;
i++)
920 for (
i = 0;
i < 2;
i++)
921 for (j = 0; j < 2; j++)
923 s->prob.p.tx16p[
i][j] =
925 for (
i = 0;
i < 2;
i++)
926 for (j = 0; j < 3; j++)
928 s->prob.p.tx32p[
i][j] =
934 for (
i = 0;
i < 4;
i++) {
937 for (j = 0; j < 2; j++)
938 for (k = 0; k < 2; k++)
939 for (l = 0; l < 6; l++)
940 for (m = 0; m < 6; m++) {
941 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
943 if (m >= 3 && l == 0)
945 for (n = 0; n < 3; n++) {
954 for (j = 0; j < 2; j++)
955 for (k = 0; k < 2; k++)
956 for (l = 0; l < 6; l++)
957 for (m = 0; m < 6; m++) {
958 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
966 if (
s->s.h.txfmmode ==
i)
971 for (
i = 0;
i < 3;
i++)
974 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
975 for (
i = 0;
i < 7;
i++)
976 for (j = 0; j < 3; j++)
978 s->prob.p.mv_mode[
i][j] =
982 for (
i = 0;
i < 4;
i++)
983 for (j = 0; j < 2; j++)
985 s->prob.p.filter[
i][j] =
988 for (
i = 0;
i < 4;
i++)
992 if (
s->s.h.allowcompinter) {
994 if (
s->s.h.comppredmode)
997 for (
i = 0;
i < 5;
i++)
1006 for (
i = 0;
i < 5;
i++) {
1008 s->prob.p.single_ref[
i][0] =
1011 s->prob.p.single_ref[
i][1] =
1017 for (
i = 0;
i < 5;
i++)
1019 s->prob.p.comp_ref[
i] =
1023 for (
i = 0;
i < 4;
i++)
1024 for (j = 0; j < 9; j++)
1026 s->prob.p.y_mode[
i][j] =
1029 for (
i = 0;
i < 4;
i++)
1030 for (j = 0; j < 4; j++)
1031 for (k = 0; k < 3; k++)
1033 s->prob.p.partition[3 -
i][j][k] =
1035 s->prob.p.partition[3 -
i][j][k]);
1038 for (
i = 0;
i < 3;
i++)
1042 for (
i = 0;
i < 2;
i++) {
1044 s->prob.p.mv_comp[
i].sign =
1047 for (j = 0; j < 10; j++)
1049 s->prob.p.mv_comp[
i].classes[j] =
1053 s->prob.p.mv_comp[
i].class0 =
1056 for (j = 0; j < 10; j++)
1058 s->prob.p.mv_comp[
i].bits[j] =
1062 for (
i = 0;
i < 2;
i++) {
1063 for (j = 0; j < 2; j++)
1064 for (k = 0; k < 3; k++)
1066 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1069 for (j = 0; j < 3; j++)
1071 s->prob.p.mv_comp[
i].fp[j] =
1075 if (
s->s.h.highprecisionmvs) {
1076 for (
i = 0;
i < 2;
i++) {
1078 s->prob.p.mv_comp[
i].class0_hp =
1082 s->prob.p.mv_comp[
i].hp =
1088 return (data2 -
data) + size2;
1092 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1095 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1096 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1098 s->prob.p.partition[bl][
c];
1100 ptrdiff_t hbs = 4 >> bl;
1102 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1103 int bytesperpixel =
s->bytesperpixel;
1108 }
else if (col + hbs < s->cols) {
1109 if (row + hbs < s->rows) {
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1123 yoff += hbs * 8 * bytesperpixel;
1124 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1128 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1130 yoff + 8 * hbs * bytesperpixel,
1131 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1132 yoff += hbs * 8 * y_stride;
1133 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1134 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1136 yoff + 8 * hbs * bytesperpixel,
1137 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1144 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1146 yoff + 8 * hbs * bytesperpixel,
1147 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1152 }
else if (row + hbs < s->rows) {
1155 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1156 yoff += hbs * 8 * y_stride;
1157 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1158 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1165 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1167 td->counts.partition[bl][
c][bp]++;
1171 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1175 ptrdiff_t hbs = 4 >> bl;
1177 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1178 int bytesperpixel =
s->bytesperpixel;
1183 }
else if (
td->b->bl == bl) {
1186 yoff += hbs * 8 * y_stride;
1187 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1189 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1190 yoff += hbs * 8 * bytesperpixel;
1191 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1196 if (col + hbs < s->cols) {
1197 if (row + hbs < s->rows) {
1198 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1199 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1200 yoff += hbs * 8 * y_stride;
1201 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1204 yoff + 8 * hbs * bytesperpixel,
1205 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1207 yoff += hbs * 8 * bytesperpixel;
1208 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1211 }
else if (row + hbs < s->rows) {
1212 yoff += hbs * 8 * y_stride;
1213 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1221 int sb_start = ( idx * n) >> log2_n;
1222 int sb_end = ((idx + 1) * n) >> log2_n;
1223 *start =
FFMIN(sb_start, n) << 3;
1224 *end =
FFMIN(sb_end, n) << 3;
1232 for (
i = 0;
i <
s->active_tile_cols;
i++)
1241 for (
i = 0;
i < 3;
i++) {
1246 for (
i = 0;
i < 8;
i++) {
1264 int row, col, tile_row, tile_col,
ret;
1266 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1268 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1271 ls_y =
f->linesize[0];
1272 ls_uv =
f->linesize[1];
1273 bytesperpixel =
s->bytesperpixel;
1276 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1278 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1280 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1283 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1284 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1291 if (tile_size >
size) {
1306 for (row = tile_row_start; row < tile_row_end;
1307 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1309 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1311 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1313 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1314 td->tile_col_start = tile_col_start;
1316 memset(
td->left_partition_ctx, 0, 8);
1317 memset(
td->left_skip_ctx, 0, 8);
1318 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1323 memset(
td->left_y_nnz_ctx, 0, 16);
1324 memset(
td->left_uv_nnz_ctx, 0, 32);
1325 memset(
td->left_segpred_ctx, 0, 8);
1327 td->c = &
td->c_b[tile_col];
1330 for (col = tile_col_start;
1332 col += 8, yoff2 += 64 * bytesperpixel,
1333 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1337 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1358 if (row + 8 <
s->rows) {
1359 memcpy(
s->intra_pred_data[0],
1360 f->data[0] + yoff + 63 * ls_y,
1361 8 *
s->cols * bytesperpixel);
1362 memcpy(
s->intra_pred_data[1],
1363 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1364 8 *
s->cols * bytesperpixel >>
s->ss_h);
1365 memcpy(
s->intra_pred_data[2],
1366 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1367 8 *
s->cols * bytesperpixel >>
s->ss_h);
1371 if (
s->s.h.filter.level) {
1374 lflvl_ptr =
s->lflvl;
1375 for (col = 0; col <
s->cols;
1376 col += 8, yoff2 += 64 * bytesperpixel,
1377 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1394 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1399 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1400 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1401 unsigned tile_cols_len;
1402 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1407 ls_y =
f->linesize[0];
1408 ls_uv =
f->linesize[1];
1411 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1412 td->tile_col_start = tile_col_start;
1413 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1414 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1415 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1417 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1419 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1421 td->c = &
td->c_b[tile_row];
1422 for (row = tile_row_start; row < tile_row_end;
1423 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1424 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1425 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1427 memset(
td->left_partition_ctx, 0, 8);
1428 memset(
td->left_skip_ctx, 0, 8);
1429 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1434 memset(
td->left_y_nnz_ctx, 0, 16);
1435 memset(
td->left_uv_nnz_ctx, 0, 32);
1436 memset(
td->left_segpred_ctx, 0, 8);
1438 for (col = tile_col_start;
1440 col += 8, yoff2 += 64 * bytesperpixel,
1441 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1444 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1451 tile_cols_len = tile_col_end - tile_col_start;
1452 if (row + 8 <
s->rows) {
1453 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1454 f->data[0] + yoff + 63 * ls_y,
1455 8 * tile_cols_len * bytesperpixel);
1456 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1457 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1459 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1460 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1464 vp9_report_tile_progress(
s, row >> 3, 1);
1474 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1476 int bytesperpixel =
s->bytesperpixel, col,
i;
1480 ls_y =
f->linesize[0];
1481 ls_uv =
f->linesize[1];
1483 for (
i = 0;
i <
s->sb_rows;
i++) {
1484 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1486 if (
s->s.h.filter.level) {
1487 yoff = (ls_y * 64)*
i;
1488 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1489 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1490 for (col = 0; col <
s->cols;
1491 col += 8, yoff += 64 * bytesperpixel,
1492 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1505 unsigned int tile, nb_blocks = 0;
1507 if (
s->s.h.segmentation.enabled) {
1508 for (tile = 0; tile <
s->active_tile_cols; tile++)
1509 nb_blocks +=
s->td[tile].nb_block_structure;
1517 par->
qp =
s->s.h.yac_qi;
1518 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1519 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1520 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1521 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1522 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1525 unsigned int block = 0;
1526 unsigned int tile, block_tile;
1528 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1531 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1533 unsigned int row =
td->block_structure[block_tile].row;
1534 unsigned int col =
td->block_structure[block_tile].col;
1535 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1539 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1540 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1542 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1543 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1544 if (
s->s.h.segmentation.absolute_vals)
1545 b->delta_qp -= par->
qp;
1562 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1567 }
else if (
ret == 0) {
1568 if (!
s->s.refs[
ref].f->buf[0]) {
1581 for (
i = 0;
i < 8;
i++) {
1582 if (
s->next_refs[
i].f->buf[0])
1584 if (
s->s.refs[
i].f->buf[0] &&
1594 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1597 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1603 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1611 f->key_frame =
s->s.h.keyframe;
1621 for (
i = 0;
i < 8;
i++) {
1622 if (
s->next_refs[
i].f->buf[0])
1624 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1626 }
else if (
s->s.refs[
i].f->buf[0]) {
1647 memset(
s->above_partition_ctx, 0,
s->cols);
1648 memset(
s->above_skip_ctx, 0,
s->cols);
1649 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1650 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1654 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1655 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1656 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1657 memset(
s->above_segpred_ctx, 0,
s->cols);
1662 "Failed to allocate block buffers\n");
1665 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1668 for (
i = 0;
i < 4;
i++) {
1669 for (j = 0; j < 2; j++)
1670 for (k = 0; k < 2; k++)
1671 for (l = 0; l < 6; l++)
1672 for (m = 0; m < 6; m++)
1673 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1674 s->prob.coef[
i][j][k][l][m], 3);
1675 if (
s->s.h.txfmmode ==
i)
1678 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1680 }
else if (!
s->s.h.refreshctx) {
1686 for (
i = 0;
i <
s->sb_rows;
i++)
1692 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1693 s->td[
i].b =
s->td[
i].b_base;
1694 s->td[
i].block =
s->td[
i].block_base;
1695 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1696 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1697 s->td[
i].eob =
s->td[
i].eob_base;
1698 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1699 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1700 s->td[
i].error_info = 0;
1705 int tile_row, tile_col;
1709 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1710 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1713 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1714 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1721 if (tile_size >
size)
1746 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1747 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1748 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1750 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1754 }
while (
s->pass++ == 1);
1757 if (
s->td->error_info < 0) {
1759 s->td->error_info = 0;
1770 for (
i = 0;
i < 8;
i++) {
1771 if (
s->s.refs[
i].f->buf[0])
1773 if (
s->next_refs[
i].f->buf[0] &&
1778 if (!
s->s.h.invisible) {
1792 for (
i = 0;
i < 3;
i++)
1794 for (
i = 0;
i < 8;
i++)
1803 for (
i = 0;
i < 3;
i++) {
1805 if (!
s->s.frames[
i].tf.f) {
1811 for (
i = 0;
i < 8;
i++) {
1814 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f) {
1829 s->s.h.filter.sharpness = -1;
1840 for (
i = 0;
i < 3;
i++) {
1841 if (
s->s.frames[
i].tf.f->buf[0])
1843 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1848 for (
i = 0;
i < 8;
i++) {
1849 if (
s->s.refs[
i].f->buf[0])
1851 if (ssrc->next_refs[
i].f->buf[0]) {
1857 s->s.h.invisible = ssrc->s.h.invisible;
1858 s->s.h.keyframe = ssrc->s.h.keyframe;
1859 s->s.h.intraonly = ssrc->s.h.intraonly;
1860 s->ss_v = ssrc->ss_v;
1861 s->ss_h = ssrc->ss_h;
1862 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1863 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1864 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1865 s->bytesperpixel = ssrc->bytesperpixel;
1866 s->gf_fmt = ssrc->gf_fmt;
1869 s->s.h.bpp = ssrc->s.h.bpp;
1870 s->bpp_index = ssrc->bpp_index;
1871 s->pix_fmt = ssrc->pix_fmt;
1872 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1873 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1874 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1875 sizeof(
s->s.h.segmentation.feat));
1896 .bsfs =
"vp9_superframe_split",
1898 #if CONFIG_VP9_DXVA2_HWACCEL
1901 #if CONFIG_VP9_D3D11VA_HWACCEL
1904 #if CONFIG_VP9_D3D11VA2_HWACCEL
1907 #if CONFIG_VP9_NVDEC_HWACCEL
1910 #if CONFIG_VP9_VAAPI_HWACCEL
1913 #if CONFIG_VP9_VDPAU_HWACCEL