24 #include "config_components.h"
49 #define VP9_SYNCCODE 0x498342
102 f->segmentation_map =
NULL;
114 sz = 64 *
s->sb_cols *
s->sb_rows;
115 if (sz !=
s->frame_extradata_pool_size) {
119 if (!
s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
132 f->segmentation_map =
f->extradata;
152 dst->segmentation_map =
src->segmentation_map;
154 dst->uses_2pass =
src->uses_2pass;
157 src->hwaccel_picture_private);
162 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
163 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
164 CONFIG_VP9_D3D12VA_HWACCEL + \
165 CONFIG_VP9_NVDEC_HWACCEL + \
166 CONFIG_VP9_VAAPI_HWACCEL + \
167 CONFIG_VP9_VDPAU_HWACCEL + \
168 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
172 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
177 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
181 switch (
s->pix_fmt) {
184 #if CONFIG_VP9_DXVA2_HWACCEL
187 #if CONFIG_VP9_D3D11VA_HWACCEL
191 #if CONFIG_VP9_D3D12VA_HWACCEL
194 #if CONFIG_VP9_NVDEC_HWACCEL
197 #if CONFIG_VP9_VAAPI_HWACCEL
200 #if CONFIG_VP9_VDPAU_HWACCEL
203 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
208 #if CONFIG_VP9_NVDEC_HWACCEL
211 #if CONFIG_VP9_VAAPI_HWACCEL
214 #if CONFIG_VP9_VDPAU_HWACCEL
221 #if CONFIG_VP9_VAAPI_HWACCEL
228 #if CONFIG_VP9_VAAPI_HWACCEL
234 *fmtp++ =
s->pix_fmt;
242 s->gf_fmt =
s->pix_fmt;
250 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
253 s->last_fmt =
s->pix_fmt;
254 s->sb_cols = (
w + 63) >> 6;
255 s->sb_rows = (
h + 63) >> 6;
256 s->cols = (
w + 7) >> 3;
257 s->rows = (
h + 7) >> 3;
260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
264 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
265 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
268 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
269 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
270 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
271 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
272 assign(
s->above_mode_ctx, uint8_t *, 16);
274 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
275 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
276 assign(
s->above_partition_ctx, uint8_t *, 8);
277 assign(
s->above_skip_ctx, uint8_t *, 8);
278 assign(
s->above_txfm_ctx, uint8_t *, 8);
279 assign(
s->above_segpred_ctx, uint8_t *, 8);
280 assign(
s->above_intra_ctx, uint8_t *, 8);
281 assign(
s->above_comp_ctx, uint8_t *, 8);
282 assign(
s->above_ref_ctx, uint8_t *, 8);
283 assign(
s->above_filter_ctx, uint8_t *, 8);
288 for (
i = 0;
i <
s->active_tile_cols;
i++)
292 if (
s->s.h.bpp !=
s->last_bpp) {
295 s->last_bpp =
s->s.h.bpp;
305 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
312 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
313 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
315 int sbs =
s->sb_cols *
s->sb_rows;
319 16 * 16 + 2 * chroma_eobs) * sbs);
334 for (
i = 1;
i <
s->active_tile_cols;
i++)
337 for (
i = 0;
i <
s->active_tile_cols;
i++) {
339 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
340 16 * 16 + 2 * chroma_eobs);
341 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
343 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
344 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
345 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
346 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
347 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
351 if (!
s->td[
i].block_structure)
356 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
373 return m - ((v + 1) >> 1);
380 static const uint8_t inv_map_table[255] = {
381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
446 s->s.h.bpp = 8 +
bits * 2;
447 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
453 s->ss_h =
s->ss_v = 0;
467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
479 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
490 s->ss_h =
s->ss_v = 1;
491 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
502 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
504 const uint8_t *data2;
528 s->last_keyframe =
s->s.h.keyframe;
531 last_invisible =
s->s.h.invisible;
534 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
536 if (
s->s.h.keyframe) {
544 s->s.h.refreshrefmask = 0xff;
550 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
551 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
552 if (
s->s.h.intraonly) {
561 s->ss_h =
s->ss_v = 1;
564 s->bytesperpixel = 1;
577 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
579 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
581 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
582 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
583 !
s->s.refs[
s->s.h.refidx[1]].f ||
584 !
s->s.refs[
s->s.h.refidx[2]].f) {
589 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
590 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
592 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
593 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
595 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
596 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
604 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
612 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
613 s->s.h.signbias[0] !=
s->s.h.signbias[2];
614 if (
s->s.h.allowcompinter) {
615 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
616 s->s.h.fixcompref = 2;
617 s->s.h.varcompref[0] = 0;
618 s->s.h.varcompref[1] = 1;
619 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
620 s->s.h.fixcompref = 1;
621 s->s.h.varcompref[0] = 0;
622 s->s.h.varcompref[1] = 2;
624 s->s.h.fixcompref = 0;
625 s->s.h.varcompref[0] = 1;
626 s->s.h.varcompref[1] = 2;
631 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
632 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
634 if (
s->s.h.keyframe ||
s->s.h.intraonly)
635 s->s.h.framectxid = 0;
638 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
640 s->s.h.lf_delta.ref[0] = 1;
641 s->s.h.lf_delta.ref[1] = 0;
642 s->s.h.lf_delta.ref[2] = -1;
643 s->s.h.lf_delta.ref[3] = -1;
644 s->s.h.lf_delta.mode[0] = 0;
645 s->s.h.lf_delta.mode[1] = 0;
646 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
652 if (
s->s.h.filter.sharpness != sharp) {
653 for (
i = 1;
i <= 63;
i++) {
657 limit >>= (sharp + 3) >> 2;
662 s->filter_lut.lim_lut[
i] =
limit;
663 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
666 s->s.h.filter.sharpness = sharp;
667 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
668 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
669 for (
i = 0;
i < 4;
i++)
672 for (
i = 0;
i < 2;
i++)
683 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
684 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
689 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
690 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
691 for (
i = 0;
i < 7;
i++)
694 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
695 for (
i = 0;
i < 3;
i++)
701 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
702 for (
i = 0;
i < 8;
i++) {
703 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
705 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
707 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
708 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
709 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
716 s->s.h.segmentation.temporal = 0;
717 s->s.h.segmentation.update_map = 0;
721 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
722 int qyac, qydc, quvac, quvdc, lflvl, sh;
724 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
725 if (
s->s.h.segmentation.absolute_vals)
730 qyac =
s->s.h.yac_qi;
742 sh =
s->s.h.filter.level >= 32;
743 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
744 if (
s->s.h.segmentation.absolute_vals)
747 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
749 lflvl =
s->s.h.filter.level;
751 if (
s->s.h.lf_delta.enabled) {
752 s->s.h.segmentation.feat[
i].lflvl[0][0] =
753 s->s.h.segmentation.feat[
i].lflvl[0][1] =
755 for (j = 1; j < 4; j++) {
756 s->s.h.segmentation.feat[
i].lflvl[j][0] =
758 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
759 s->s.h.segmentation.feat[
i].lflvl[j][1] =
761 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
764 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
765 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
775 for (
s->s.h.tiling.log2_tile_cols = 0;
776 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
777 s->s.h.tiling.log2_tile_cols++) ;
778 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
780 while (
max >
s->s.h.tiling.log2_tile_cols) {
782 s->s.h.tiling.log2_tile_cols++;
787 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
788 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
793 for (
i = 0;
i <
s->active_tile_cols;
i++)
798 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
800 s->s.h.tiling.tile_cols : 1;
805 n_range_coders =
s->s.h.tiling.tile_cols;
812 for (
i = 0;
i <
s->active_tile_cols;
i++) {
815 rc += n_range_coders;
820 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
821 int valid_ref_frame = 0;
822 for (
i = 0;
i < 3;
i++) {
824 int refw =
ref->width, refh =
ref->height;
828 "Ref pixfmt (%s) did not match current frame (%s)",
832 }
else if (refw ==
w && refh ==
h) {
833 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
837 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
839 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
844 s->mvscale[
i][0] = (refw << 14) /
w;
845 s->mvscale[
i][1] = (refh << 14) /
h;
846 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
847 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
851 if (!valid_ref_frame) {
852 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
857 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
858 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
868 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
875 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
879 if (size2 >
size - (data2 -
data)) {
892 for (
i = 0;
i <
s->active_tile_cols;
i++) {
893 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
894 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
895 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
897 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
899 s->td[
i].nb_block_structure = 0;
905 s->prob.p =
s->prob_ctx[
c].p;
908 if (
s->s.h.lossless) {
912 if (
s->s.h.txfmmode == 3)
916 for (
i = 0;
i < 2;
i++)
919 for (
i = 0;
i < 2;
i++)
920 for (j = 0; j < 2; j++)
922 s->prob.p.tx16p[
i][j] =
924 for (
i = 0;
i < 2;
i++)
925 for (j = 0; j < 3; j++)
927 s->prob.p.tx32p[
i][j] =
933 for (
i = 0;
i < 4;
i++) {
934 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
936 for (j = 0; j < 2; j++)
937 for (k = 0; k < 2; k++)
938 for (l = 0; l < 6; l++)
939 for (m = 0; m < 6; m++) {
940 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
941 uint8_t *
r =
ref[j][k][l][m];
942 if (m >= 3 && l == 0)
944 for (n = 0; n < 3; n++) {
953 for (j = 0; j < 2; j++)
954 for (k = 0; k < 2; k++)
955 for (l = 0; l < 6; l++)
956 for (m = 0; m < 6; m++) {
957 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
958 uint8_t *
r =
ref[j][k][l][m];
965 if (
s->s.h.txfmmode ==
i)
970 for (
i = 0;
i < 3;
i++)
973 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
974 for (
i = 0;
i < 7;
i++)
975 for (j = 0; j < 3; j++)
977 s->prob.p.mv_mode[
i][j] =
981 for (
i = 0;
i < 4;
i++)
982 for (j = 0; j < 2; j++)
984 s->prob.p.filter[
i][j] =
987 for (
i = 0;
i < 4;
i++)
991 if (
s->s.h.allowcompinter) {
993 if (
s->s.h.comppredmode)
996 for (
i = 0;
i < 5;
i++)
1005 for (
i = 0;
i < 5;
i++) {
1007 s->prob.p.single_ref[
i][0] =
1010 s->prob.p.single_ref[
i][1] =
1016 for (
i = 0;
i < 5;
i++)
1018 s->prob.p.comp_ref[
i] =
1022 for (
i = 0;
i < 4;
i++)
1023 for (j = 0; j < 9; j++)
1025 s->prob.p.y_mode[
i][j] =
1028 for (
i = 0;
i < 4;
i++)
1029 for (j = 0; j < 4; j++)
1030 for (k = 0; k < 3; k++)
1032 s->prob.p.partition[3 -
i][j][k] =
1034 s->prob.p.partition[3 -
i][j][k]);
1037 for (
i = 0;
i < 3;
i++)
1041 for (
i = 0;
i < 2;
i++) {
1043 s->prob.p.mv_comp[
i].sign =
1046 for (j = 0; j < 10; j++)
1048 s->prob.p.mv_comp[
i].classes[j] =
1052 s->prob.p.mv_comp[
i].class0 =
1055 for (j = 0; j < 10; j++)
1057 s->prob.p.mv_comp[
i].bits[j] =
1061 for (
i = 0;
i < 2;
i++) {
1062 for (j = 0; j < 2; j++)
1063 for (k = 0; k < 3; k++)
1065 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1068 for (j = 0; j < 3; j++)
1070 s->prob.p.mv_comp[
i].fp[j] =
1074 if (
s->s.h.highprecisionmvs) {
1075 for (
i = 0;
i < 2;
i++) {
1077 s->prob.p.mv_comp[
i].class0_hp =
1081 s->prob.p.mv_comp[
i].hp =
1087 return (data2 -
data) + size2;
1091 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1094 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1097 s->prob.p.partition[bl][
c];
1099 ptrdiff_t hbs = 4 >> bl;
1101 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1102 int bytesperpixel =
s->bytesperpixel;
1107 }
else if (col + hbs < s->cols) {
1108 if (row + hbs < s->rows) {
1116 yoff += hbs * 8 * y_stride;
1117 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1122 yoff += hbs * 8 * bytesperpixel;
1123 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1127 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1129 yoff + 8 * hbs * bytesperpixel,
1130 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1131 yoff += hbs * 8 * y_stride;
1132 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1133 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1134 decode_sb(td, row + hbs, col + hbs, lflvl,
1135 yoff + 8 * hbs * bytesperpixel,
1136 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1143 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1145 yoff + 8 * hbs * bytesperpixel,
1146 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1151 }
else if (row + hbs < s->rows) {
1154 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1155 yoff += hbs * 8 * y_stride;
1156 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1157 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1170 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1174 ptrdiff_t hbs = 4 >> bl;
1176 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1177 int bytesperpixel =
s->bytesperpixel;
1182 }
else if (td->
b->
bl == bl) {
1185 yoff += hbs * 8 * y_stride;
1186 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1188 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1189 yoff += hbs * 8 * bytesperpixel;
1190 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1195 if (col + hbs < s->cols) {
1196 if (row + hbs < s->rows) {
1197 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1198 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1199 yoff += hbs * 8 * y_stride;
1200 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1201 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1203 yoff + 8 * hbs * bytesperpixel,
1204 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1206 yoff += hbs * 8 * bytesperpixel;
1207 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1208 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1210 }
else if (row + hbs < s->rows) {
1211 yoff += hbs * 8 * y_stride;
1212 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1213 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1220 int sb_start = ( idx * n) >> log2_n;
1221 int sb_end = ((idx + 1) * n) >> log2_n;
1222 *start =
FFMIN(sb_start, n) << 3;
1223 *end =
FFMIN(sb_end, n) << 3;
1231 for (
i = 0;
i <
s->active_tile_cols;
i++)
1240 for (
int i = 0;
i < 3;
i++)
1243 for (
i = 0;
i < 8;
i++) {
1262 int row, col, tile_row, tile_col,
ret;
1264 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1266 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1269 ls_y =
f->linesize[0];
1270 ls_uv =
f->linesize[1];
1271 bytesperpixel =
s->bytesperpixel;
1274 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1276 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1278 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1281 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1282 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1289 if (tile_size >
size)
1300 for (row = tile_row_start; row < tile_row_end;
1301 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1303 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1305 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1307 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1312 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1321 td->
c = &td->
c_b[tile_col];
1324 for (col = tile_col_start;
1326 col += 8, yoff2 += 64 * bytesperpixel,
1327 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1331 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1352 if (row + 8 <
s->rows) {
1353 memcpy(
s->intra_pred_data[0],
1354 f->data[0] + yoff + 63 * ls_y,
1355 8 *
s->cols * bytesperpixel);
1356 memcpy(
s->intra_pred_data[1],
1357 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1358 8 *
s->cols * bytesperpixel >>
s->ss_h);
1359 memcpy(
s->intra_pred_data[2],
1360 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1361 8 *
s->cols * bytesperpixel >>
s->ss_h);
1365 if (
s->s.h.filter.level) {
1368 lflvl_ptr =
s->lflvl;
1369 for (col = 0; col <
s->cols;
1370 col += 8, yoff2 += 64 * bytesperpixel,
1371 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1388 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1393 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1394 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1395 unsigned tile_cols_len;
1396 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1401 ls_y =
f->linesize[0];
1402 ls_uv =
f->linesize[1];
1405 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1407 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1408 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1409 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1411 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1413 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1415 td->
c = &td->
c_b[tile_row];
1416 for (row = tile_row_start; row < tile_row_end;
1417 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1418 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1419 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1423 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1432 for (col = tile_col_start;
1434 col += 8, yoff2 += 64 * bytesperpixel,
1435 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1438 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1445 tile_cols_len = tile_col_end - tile_col_start;
1446 if (row + 8 <
s->rows) {
1447 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1448 f->data[0] + yoff + 63 * ls_y,
1449 8 * tile_cols_len * bytesperpixel);
1450 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1451 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1452 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1453 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1454 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1455 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1458 vp9_report_tile_progress(
s, row >> 3, 1);
1468 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1470 int bytesperpixel =
s->bytesperpixel, col,
i;
1474 ls_y =
f->linesize[0];
1475 ls_uv =
f->linesize[1];
1477 for (
i = 0;
i <
s->sb_rows;
i++) {
1478 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1480 if (
s->s.h.filter.level) {
1481 yoff = (ls_y * 64)*
i;
1482 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1483 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1484 for (col = 0; col <
s->cols;
1485 col += 8, yoff += 64 * bytesperpixel,
1486 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1499 unsigned int tile, nb_blocks = 0;
1501 if (
s->s.h.segmentation.enabled) {
1502 for (tile = 0; tile <
s->active_tile_cols; tile++)
1503 nb_blocks +=
s->td[tile].nb_block_structure;
1511 par->
qp =
s->s.h.yac_qi;
1512 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1513 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1514 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1515 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1516 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1519 unsigned int block = 0;
1520 unsigned int tile, block_tile;
1522 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1529 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1536 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1537 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1538 if (
s->s.h.segmentation.absolute_vals)
1539 b->delta_qp -= par->
qp;
1556 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1562 }
else if (
ret == 0) {
1563 if (!
s->s.refs[
ref].f) {
1567 for (
int i = 0;
i < 8;
i++)
1582 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1584 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1591 if (
s->s.h.keyframe)
1605 for (
i = 0;
i < 8;
i++) {
1607 s->s.h.refreshrefmask & (1 <<
i) ?
1626 memset(
s->above_partition_ctx, 0,
s->cols);
1627 memset(
s->above_skip_ctx, 0,
s->cols);
1628 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1629 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1633 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1634 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1635 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1636 memset(
s->above_segpred_ctx, 0,
s->cols);
1641 "Failed to allocate block buffers\n");
1644 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1647 for (
i = 0;
i < 4;
i++) {
1648 for (j = 0; j < 2; j++)
1649 for (k = 0; k < 2; k++)
1650 for (l = 0; l < 6; l++)
1651 for (m = 0; m < 6; m++)
1652 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1653 s->prob.coef[
i][j][k][l][m], 3);
1654 if (
s->s.h.txfmmode ==
i)
1657 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1659 }
else if (!
s->s.h.refreshctx) {
1665 for (
i = 0;
i <
s->sb_rows;
i++)
1671 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1672 s->td[
i].b =
s->td[
i].b_base;
1673 s->td[
i].block =
s->td[
i].block_base;
1674 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1675 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1676 s->td[
i].eob =
s->td[
i].eob_base;
1677 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1678 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1679 s->td[
i].error_info = 0;
1684 int tile_row, tile_col;
1688 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1689 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1692 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1693 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1700 if (tile_size >
size)
1723 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1724 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1725 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1727 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1731 }
while (
s->pass++ == 1);
1733 if (
s->td->error_info < 0) {
1735 s->td->error_info = 0;
1748 for (
int i = 0;
i < 8;
i++)
1751 if (!
s->s.h.invisible) {
1768 for (
i = 0;
i < 3;
i++)
1770 for (
i = 0;
i < 8;
i++)
1783 s->s.h.filter.sharpness = -1;
1801 for (
int i = 0;
i < 3;
i++)
1803 for (
int i = 0;
i < 8;
i++)
1806 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1808 s->s.h.invisible = ssrc->s.h.invisible;
1809 s->s.h.keyframe = ssrc->s.h.keyframe;
1810 s->s.h.intraonly = ssrc->s.h.intraonly;
1811 s->ss_v = ssrc->ss_v;
1812 s->ss_h = ssrc->ss_h;
1813 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1814 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1815 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1816 s->bytesperpixel = ssrc->bytesperpixel;
1817 s->gf_fmt = ssrc->gf_fmt;
1820 s->s.h.bpp = ssrc->s.h.bpp;
1821 s->bpp_index = ssrc->bpp_index;
1822 s->pix_fmt = ssrc->pix_fmt;
1823 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1824 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1825 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1826 sizeof(
s->s.h.segmentation.feat));
1848 .bsfs =
"vp9_superframe_split",
1850 #if CONFIG_VP9_DXVA2_HWACCEL
1853 #if CONFIG_VP9_D3D11VA_HWACCEL
1856 #if CONFIG_VP9_D3D11VA2_HWACCEL
1859 #if CONFIG_VP9_D3D12VA_HWACCEL
1862 #if CONFIG_VP9_NVDEC_HWACCEL
1865 #if CONFIG_VP9_VAAPI_HWACCEL
1868 #if CONFIG_VP9_VDPAU_HWACCEL
1871 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL