24 #include "config_components.h"
47 #define VP9_SYNCCODE 0x498342
66 for (
i = 0;
i < n;
i++)
104 f->segmentation_map =
NULL;
105 f->hwaccel_picture_private =
NULL;
117 sz = 64 *
s->sb_cols *
s->sb_rows;
118 if (sz !=
s->frame_extradata_pool_size) {
121 if (!
s->frame_extradata_pool) {
122 s->frame_extradata_pool_size = 0;
126 s->frame_extradata_pool_size = sz;
133 memset(
f->extradata->data, 0,
f->extradata->size);
135 f->segmentation_map =
f->extradata->data;
139 &
f->hwaccel_priv_buf);
166 if (
src->hwaccel_picture_private) {
182 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
183 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
184 CONFIG_VP9_NVDEC_HWACCEL + \
185 CONFIG_VP9_VAAPI_HWACCEL + \
186 CONFIG_VP9_VDPAU_HWACCEL + \
187 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
191 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
196 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
200 switch (
s->pix_fmt) {
203 #if CONFIG_VP9_DXVA2_HWACCEL
206 #if CONFIG_VP9_D3D11VA_HWACCEL
210 #if CONFIG_VP9_NVDEC_HWACCEL
213 #if CONFIG_VP9_VAAPI_HWACCEL
216 #if CONFIG_VP9_VDPAU_HWACCEL
219 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
224 #if CONFIG_VP9_NVDEC_HWACCEL
227 #if CONFIG_VP9_VAAPI_HWACCEL
230 #if CONFIG_VP9_VDPAU_HWACCEL
237 #if CONFIG_VP9_VAAPI_HWACCEL
244 #if CONFIG_VP9_VAAPI_HWACCEL
250 *fmtp++ =
s->pix_fmt;
258 s->gf_fmt =
s->pix_fmt;
266 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
269 s->last_fmt =
s->pix_fmt;
270 s->sb_cols = (
w + 63) >> 6;
271 s->sb_rows = (
h + 63) >> 6;
272 s->cols = (
w + 7) >> 3;
273 s->rows = (
h + 7) >> 3;
276 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
280 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
281 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
284 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
285 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
286 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
287 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
288 assign(
s->above_mode_ctx, uint8_t *, 16);
290 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
291 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
292 assign(
s->above_partition_ctx, uint8_t *, 8);
293 assign(
s->above_skip_ctx, uint8_t *, 8);
294 assign(
s->above_txfm_ctx, uint8_t *, 8);
295 assign(
s->above_segpred_ctx, uint8_t *, 8);
296 assign(
s->above_intra_ctx, uint8_t *, 8);
297 assign(
s->above_comp_ctx, uint8_t *, 8);
298 assign(
s->above_ref_ctx, uint8_t *, 8);
299 assign(
s->above_filter_ctx, uint8_t *, 8);
304 for (
i = 0;
i <
s->active_tile_cols;
i++)
308 if (
s->s.h.bpp !=
s->last_bpp) {
311 s->last_bpp =
s->s.h.bpp;
321 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
324 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
328 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
329 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
331 int sbs =
s->sb_cols *
s->sb_rows;
334 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
335 16 * 16 + 2 * chroma_eobs) * sbs);
336 if (!
td->b_base || !
td->block_base)
338 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
339 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
340 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
341 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
342 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
346 if (!
td->block_structure)
350 for (
i = 1;
i <
s->active_tile_cols;
i++)
353 for (
i = 0;
i <
s->active_tile_cols;
i++) {
355 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
356 16 * 16 + 2 * chroma_eobs);
357 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
359 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
360 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
361 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
362 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
363 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
367 if (!
s->td[
i].block_structure)
372 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
389 return m - ((v + 1) >> 1);
396 static const uint8_t inv_map_table[255] = {
397 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
398 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
399 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
400 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
401 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
402 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
403 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
404 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
405 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
406 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
407 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
408 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
409 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
410 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
411 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
412 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
413 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
414 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
462 s->s.h.bpp = 8 +
bits * 2;
463 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
469 s->ss_h =
s->ss_v = 0;
483 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
495 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
506 s->ss_h =
s->ss_v = 1;
507 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
518 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
520 const uint8_t *data2;
544 s->last_keyframe =
s->s.h.keyframe;
547 last_invisible =
s->s.h.invisible;
550 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
552 if (
s->s.h.keyframe) {
560 s->s.h.refreshrefmask = 0xff;
566 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
567 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
568 if (
s->s.h.intraonly) {
577 s->ss_h =
s->ss_v = 1;
580 s->bytesperpixel = 1;
593 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
595 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
597 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
598 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
599 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
600 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
605 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
606 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
608 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
609 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
611 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
612 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
620 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
627 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
628 s->s.h.signbias[0] !=
s->s.h.signbias[2];
629 if (
s->s.h.allowcompinter) {
630 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
631 s->s.h.fixcompref = 2;
632 s->s.h.varcompref[0] = 0;
633 s->s.h.varcompref[1] = 1;
634 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
635 s->s.h.fixcompref = 1;
636 s->s.h.varcompref[0] = 0;
637 s->s.h.varcompref[1] = 2;
639 s->s.h.fixcompref = 0;
640 s->s.h.varcompref[0] = 1;
641 s->s.h.varcompref[1] = 2;
646 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
647 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
649 if (
s->s.h.keyframe ||
s->s.h.intraonly)
650 s->s.h.framectxid = 0;
653 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
655 s->s.h.lf_delta.ref[0] = 1;
656 s->s.h.lf_delta.ref[1] = 0;
657 s->s.h.lf_delta.ref[2] = -1;
658 s->s.h.lf_delta.ref[3] = -1;
659 s->s.h.lf_delta.mode[0] = 0;
660 s->s.h.lf_delta.mode[1] = 0;
661 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
667 if (
s->s.h.filter.sharpness != sharp) {
668 for (
i = 1;
i <= 63;
i++) {
672 limit >>= (sharp + 3) >> 2;
677 s->filter_lut.lim_lut[
i] =
limit;
678 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
681 s->s.h.filter.sharpness = sharp;
682 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
683 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
684 for (
i = 0;
i < 4;
i++)
687 for (
i = 0;
i < 2;
i++)
698 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
699 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
704 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
705 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
706 for (
i = 0;
i < 7;
i++)
709 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
710 for (
i = 0;
i < 3;
i++)
716 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
717 for (
i = 0;
i < 8;
i++) {
718 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
720 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
722 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
723 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
724 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
730 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
731 int qyac, qydc, quvac, quvdc, lflvl, sh;
733 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
734 if (
s->s.h.segmentation.absolute_vals)
739 qyac =
s->s.h.yac_qi;
751 sh =
s->s.h.filter.level >= 32;
752 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
753 if (
s->s.h.segmentation.absolute_vals)
756 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
758 lflvl =
s->s.h.filter.level;
760 if (
s->s.h.lf_delta.enabled) {
761 s->s.h.segmentation.feat[
i].lflvl[0][0] =
762 s->s.h.segmentation.feat[
i].lflvl[0][1] =
764 for (j = 1; j < 4; j++) {
765 s->s.h.segmentation.feat[
i].lflvl[j][0] =
767 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
768 s->s.h.segmentation.feat[
i].lflvl[j][1] =
770 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
773 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
774 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
784 for (
s->s.h.tiling.log2_tile_cols = 0;
785 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
786 s->s.h.tiling.log2_tile_cols++) ;
787 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
789 while (
max >
s->s.h.tiling.log2_tile_cols) {
791 s->s.h.tiling.log2_tile_cols++;
796 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
797 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
802 for (
i = 0;
i <
s->active_tile_cols;
i++)
807 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
809 s->s.h.tiling.tile_cols : 1;
814 n_range_coders =
s->s.h.tiling.tile_cols;
821 for (
i = 0;
i <
s->active_tile_cols;
i++) {
824 rc += n_range_coders;
829 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
830 int valid_ref_frame = 0;
831 for (
i = 0;
i < 3;
i++) {
833 int refw =
ref->width, refh =
ref->height;
837 "Ref pixfmt (%s) did not match current frame (%s)",
841 }
else if (refw ==
w && refh ==
h) {
842 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
846 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
848 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
853 s->mvscale[
i][0] = (refw << 14) /
w;
854 s->mvscale[
i][1] = (refh << 14) /
h;
855 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
856 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
860 if (!valid_ref_frame) {
861 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
866 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
867 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
877 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
884 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
888 if (size2 >
size - (data2 -
data)) {
901 for (
i = 0;
i <
s->active_tile_cols;
i++) {
902 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
903 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
904 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
906 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
908 s->td[
i].nb_block_structure = 0;
914 s->prob.p =
s->prob_ctx[
c].p;
917 if (
s->s.h.lossless) {
921 if (
s->s.h.txfmmode == 3)
925 for (
i = 0;
i < 2;
i++)
928 for (
i = 0;
i < 2;
i++)
929 for (j = 0; j < 2; j++)
931 s->prob.p.tx16p[
i][j] =
933 for (
i = 0;
i < 2;
i++)
934 for (j = 0; j < 3; j++)
936 s->prob.p.tx32p[
i][j] =
942 for (
i = 0;
i < 4;
i++) {
943 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
945 for (j = 0; j < 2; j++)
946 for (k = 0; k < 2; k++)
947 for (l = 0; l < 6; l++)
948 for (m = 0; m < 6; m++) {
949 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
950 uint8_t *
r =
ref[j][k][l][m];
951 if (m >= 3 && l == 0)
953 for (n = 0; n < 3; n++) {
962 for (j = 0; j < 2; j++)
963 for (k = 0; k < 2; k++)
964 for (l = 0; l < 6; l++)
965 for (m = 0; m < 6; m++) {
966 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
967 uint8_t *
r =
ref[j][k][l][m];
974 if (
s->s.h.txfmmode ==
i)
979 for (
i = 0;
i < 3;
i++)
982 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
983 for (
i = 0;
i < 7;
i++)
984 for (j = 0; j < 3; j++)
986 s->prob.p.mv_mode[
i][j] =
990 for (
i = 0;
i < 4;
i++)
991 for (j = 0; j < 2; j++)
993 s->prob.p.filter[
i][j] =
996 for (
i = 0;
i < 4;
i++)
1000 if (
s->s.h.allowcompinter) {
1002 if (
s->s.h.comppredmode)
1005 for (
i = 0;
i < 5;
i++)
1014 for (
i = 0;
i < 5;
i++) {
1016 s->prob.p.single_ref[
i][0] =
1019 s->prob.p.single_ref[
i][1] =
1025 for (
i = 0;
i < 5;
i++)
1027 s->prob.p.comp_ref[
i] =
1031 for (
i = 0;
i < 4;
i++)
1032 for (j = 0; j < 9; j++)
1034 s->prob.p.y_mode[
i][j] =
1037 for (
i = 0;
i < 4;
i++)
1038 for (j = 0; j < 4; j++)
1039 for (k = 0; k < 3; k++)
1041 s->prob.p.partition[3 -
i][j][k] =
1043 s->prob.p.partition[3 -
i][j][k]);
1046 for (
i = 0;
i < 3;
i++)
1050 for (
i = 0;
i < 2;
i++) {
1052 s->prob.p.mv_comp[
i].sign =
1055 for (j = 0; j < 10; j++)
1057 s->prob.p.mv_comp[
i].classes[j] =
1061 s->prob.p.mv_comp[
i].class0 =
1064 for (j = 0; j < 10; j++)
1066 s->prob.p.mv_comp[
i].bits[j] =
1070 for (
i = 0;
i < 2;
i++) {
1071 for (j = 0; j < 2; j++)
1072 for (k = 0; k < 3; k++)
1074 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1077 for (j = 0; j < 3; j++)
1079 s->prob.p.mv_comp[
i].fp[j] =
1083 if (
s->s.h.highprecisionmvs) {
1084 for (
i = 0;
i < 2;
i++) {
1086 s->prob.p.mv_comp[
i].class0_hp =
1090 s->prob.p.mv_comp[
i].hp =
1096 return (data2 -
data) + size2;
1100 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1103 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1104 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1106 s->prob.p.partition[bl][
c];
1108 ptrdiff_t hbs = 4 >> bl;
1110 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1111 int bytesperpixel =
s->bytesperpixel;
1116 }
else if (col + hbs < s->cols) {
1117 if (row + hbs < s->rows) {
1125 yoff += hbs * 8 * y_stride;
1126 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1131 yoff += hbs * 8 * bytesperpixel;
1132 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1136 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1138 yoff + 8 * hbs * bytesperpixel,
1139 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1140 yoff += hbs * 8 * y_stride;
1141 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1142 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1144 yoff + 8 * hbs * bytesperpixel,
1145 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1152 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1154 yoff + 8 * hbs * bytesperpixel,
1155 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1160 }
else if (row + hbs < s->rows) {
1163 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1164 yoff += hbs * 8 * y_stride;
1165 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1166 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1173 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1175 td->counts.partition[bl][
c][bp]++;
1179 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1183 ptrdiff_t hbs = 4 >> bl;
1185 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1186 int bytesperpixel =
s->bytesperpixel;
1191 }
else if (
td->b->bl == bl) {
1194 yoff += hbs * 8 * y_stride;
1195 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1197 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1198 yoff += hbs * 8 * bytesperpixel;
1199 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1204 if (col + hbs < s->cols) {
1205 if (row + hbs < s->rows) {
1206 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1207 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1208 yoff += hbs * 8 * y_stride;
1209 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1212 yoff + 8 * hbs * bytesperpixel,
1213 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1215 yoff += hbs * 8 * bytesperpixel;
1216 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1219 }
else if (row + hbs < s->rows) {
1220 yoff += hbs * 8 * y_stride;
1221 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1229 int sb_start = ( idx * n) >> log2_n;
1230 int sb_end = ((idx + 1) * n) >> log2_n;
1231 *start =
FFMIN(sb_start, n) << 3;
1232 *end =
FFMIN(sb_end, n) << 3;
1240 for (
i = 0;
i <
s->active_tile_cols;
i++)
1249 for (
i = 0;
i < 3;
i++) {
1254 for (
i = 0;
i < 8;
i++) {
1275 int row, col, tile_row, tile_col,
ret;
1277 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1279 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1282 ls_y =
f->linesize[0];
1283 ls_uv =
f->linesize[1];
1284 bytesperpixel =
s->bytesperpixel;
1287 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1289 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1291 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1294 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1295 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1302 if (tile_size >
size)
1313 for (row = tile_row_start; row < tile_row_end;
1314 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1316 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1318 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1320 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1321 td->tile_col_start = tile_col_start;
1323 memset(
td->left_partition_ctx, 0, 8);
1324 memset(
td->left_skip_ctx, 0, 8);
1325 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1330 memset(
td->left_y_nnz_ctx, 0, 16);
1331 memset(
td->left_uv_nnz_ctx, 0, 32);
1332 memset(
td->left_segpred_ctx, 0, 8);
1334 td->c = &
td->c_b[tile_col];
1337 for (col = tile_col_start;
1339 col += 8, yoff2 += 64 * bytesperpixel,
1340 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1344 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1365 if (row + 8 <
s->rows) {
1366 memcpy(
s->intra_pred_data[0],
1367 f->data[0] + yoff + 63 * ls_y,
1368 8 *
s->cols * bytesperpixel);
1369 memcpy(
s->intra_pred_data[1],
1370 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1371 8 *
s->cols * bytesperpixel >>
s->ss_h);
1372 memcpy(
s->intra_pred_data[2],
1373 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1374 8 *
s->cols * bytesperpixel >>
s->ss_h);
1378 if (
s->s.h.filter.level) {
1381 lflvl_ptr =
s->lflvl;
1382 for (col = 0; col <
s->cols;
1383 col += 8, yoff2 += 64 * bytesperpixel,
1384 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1401 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1406 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1407 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1408 unsigned tile_cols_len;
1409 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1414 ls_y =
f->linesize[0];
1415 ls_uv =
f->linesize[1];
1418 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1419 td->tile_col_start = tile_col_start;
1420 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1421 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1422 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1424 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1426 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1428 td->c = &
td->c_b[tile_row];
1429 for (row = tile_row_start; row < tile_row_end;
1430 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1431 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1432 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1434 memset(
td->left_partition_ctx, 0, 8);
1435 memset(
td->left_skip_ctx, 0, 8);
1436 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1441 memset(
td->left_y_nnz_ctx, 0, 16);
1442 memset(
td->left_uv_nnz_ctx, 0, 32);
1443 memset(
td->left_segpred_ctx, 0, 8);
1445 for (col = tile_col_start;
1447 col += 8, yoff2 += 64 * bytesperpixel,
1448 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1451 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1458 tile_cols_len = tile_col_end - tile_col_start;
1459 if (row + 8 <
s->rows) {
1460 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1461 f->data[0] + yoff + 63 * ls_y,
1462 8 * tile_cols_len * bytesperpixel);
1463 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1464 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1465 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1466 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1467 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1468 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1471 vp9_report_tile_progress(
s, row >> 3, 1);
1481 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1483 int bytesperpixel =
s->bytesperpixel, col,
i;
1487 ls_y =
f->linesize[0];
1488 ls_uv =
f->linesize[1];
1490 for (
i = 0;
i <
s->sb_rows;
i++) {
1491 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1493 if (
s->s.h.filter.level) {
1494 yoff = (ls_y * 64)*
i;
1495 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1496 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1497 for (col = 0; col <
s->cols;
1498 col += 8, yoff += 64 * bytesperpixel,
1499 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1512 unsigned int tile, nb_blocks = 0;
1514 if (
s->s.h.segmentation.enabled) {
1515 for (tile = 0; tile <
s->active_tile_cols; tile++)
1516 nb_blocks +=
s->td[tile].nb_block_structure;
1524 par->
qp =
s->s.h.yac_qi;
1525 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1526 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1527 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1528 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1529 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1532 unsigned int block = 0;
1533 unsigned int tile, block_tile;
1535 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1538 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1540 unsigned int row =
td->block_structure[block_tile].row;
1541 unsigned int col =
td->block_structure[block_tile].col;
1542 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1546 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1547 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1549 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1550 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1551 if (
s->s.h.segmentation.absolute_vals)
1552 b->delta_qp -= par->
qp;
1569 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1574 }
else if (
ret == 0) {
1575 if (!
s->s.refs[
ref].f->buf[0]) {
1583 for (
i = 0;
i < 8;
i++) {
1584 if (
s->next_refs[
i].f->buf[0])
1586 if (
s->s.refs[
i].f->buf[0] &&
1596 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1599 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1605 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1613 if (
s->s.h.keyframe)
1626 for (
i = 0;
i < 8;
i++) {
1627 if (
s->next_refs[
i].f->buf[0])
1629 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1631 }
else if (
s->s.refs[
i].f->buf[0]) {
1653 memset(
s->above_partition_ctx, 0,
s->cols);
1654 memset(
s->above_skip_ctx, 0,
s->cols);
1655 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1656 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1660 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1661 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1662 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1663 memset(
s->above_segpred_ctx, 0,
s->cols);
1668 "Failed to allocate block buffers\n");
1671 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1674 for (
i = 0;
i < 4;
i++) {
1675 for (j = 0; j < 2; j++)
1676 for (k = 0; k < 2; k++)
1677 for (l = 0; l < 6; l++)
1678 for (m = 0; m < 6; m++)
1679 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1680 s->prob.coef[
i][j][k][l][m], 3);
1681 if (
s->s.h.txfmmode ==
i)
1684 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1686 }
else if (!
s->s.h.refreshctx) {
1692 for (
i = 0;
i <
s->sb_rows;
i++)
1698 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1699 s->td[
i].b =
s->td[
i].b_base;
1700 s->td[
i].block =
s->td[
i].block_base;
1701 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1702 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1703 s->td[
i].eob =
s->td[
i].eob_base;
1704 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1705 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1706 s->td[
i].error_info = 0;
1711 int tile_row, tile_col;
1715 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1716 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1719 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1720 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1727 if (tile_size >
size)
1752 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1753 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1754 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1756 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1760 }
while (
s->pass++ == 1);
1763 if (
s->td->error_info < 0) {
1765 s->td->error_info = 0;
1776 for (
i = 0;
i < 8;
i++) {
1777 if (
s->s.refs[
i].f->buf[0])
1779 if (
s->next_refs[
i].f->buf[0] &&
1784 if (!
s->s.h.invisible) {
1798 for (
i = 0;
i < 3;
i++)
1800 for (
i = 0;
i < 8;
i++)
1813 s->s.h.filter.sharpness = -1;
1823 for (
int i = 0;
i < 3;
i++) {
1825 if (!
s->s.frames[
i].tf.f)
1828 for (
int i = 0;
i < 8;
i++) {
1831 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1843 for (
i = 0;
i < 3;
i++) {
1844 if (
s->s.frames[
i].tf.f->buf[0])
1846 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1851 for (
i = 0;
i < 8;
i++) {
1852 if (
s->s.refs[
i].f->buf[0])
1854 if (ssrc->next_refs[
i].f->buf[0]) {
1860 s->s.h.invisible = ssrc->s.h.invisible;
1861 s->s.h.keyframe = ssrc->s.h.keyframe;
1862 s->s.h.intraonly = ssrc->s.h.intraonly;
1863 s->ss_v = ssrc->ss_v;
1864 s->ss_h = ssrc->ss_h;
1865 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1866 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1867 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1868 s->bytesperpixel = ssrc->bytesperpixel;
1869 s->gf_fmt = ssrc->gf_fmt;
1872 s->s.h.bpp = ssrc->s.h.bpp;
1873 s->bpp_index = ssrc->bpp_index;
1874 s->pix_fmt = ssrc->pix_fmt;
1875 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1876 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1877 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1878 sizeof(
s->s.h.segmentation.feat));
1900 .bsfs =
"vp9_superframe_split",
1902 #if CONFIG_VP9_DXVA2_HWACCEL
1905 #if CONFIG_VP9_D3D11VA_HWACCEL
1908 #if CONFIG_VP9_D3D11VA2_HWACCEL
1911 #if CONFIG_VP9_NVDEC_HWACCEL
1914 #if CONFIG_VP9_VAAPI_HWACCEL
1917 #if CONFIG_VP9_VDPAU_HWACCEL
1920 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL