24 #include "config_components.h"
50 #define VP9_SYNCCODE 0x498342
104 f->segmentation_map =
NULL;
116 sz = 64 *
s->sb_cols *
s->sb_rows;
117 if (sz !=
s->frame_extradata_pool_size) {
121 if (!
s->frame_extradata_pool) {
122 s->frame_extradata_pool_size = 0;
126 s->frame_extradata_pool_size = sz;
134 f->segmentation_map =
f->extradata;
151 dst->frame_header =
src->frame_header;
157 dst->segmentation_map =
src->segmentation_map;
159 dst->uses_2pass =
src->uses_2pass;
162 src->hwaccel_picture_private);
167 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
168 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
169 CONFIG_VP9_D3D12VA_HWACCEL + \
170 CONFIG_VP9_NVDEC_HWACCEL + \
171 CONFIG_VP9_VAAPI_HWACCEL + \
172 CONFIG_VP9_VDPAU_HWACCEL + \
173 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
174 CONFIG_VP9_VULKAN_HWACCEL)
178 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
184 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
189 switch (
s->pix_fmt) {
192 #if CONFIG_VP9_DXVA2_HWACCEL
195 #if CONFIG_VP9_D3D11VA_HWACCEL
199 #if CONFIG_VP9_D3D12VA_HWACCEL
202 #if CONFIG_VP9_NVDEC_HWACCEL
205 #if CONFIG_VP9_VAAPI_HWACCEL
208 #if CONFIG_VP9_VDPAU_HWACCEL
211 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
214 #if CONFIG_VP9_VULKAN_HWACCEL
219 #if CONFIG_VP9_NVDEC_HWACCEL
222 #if CONFIG_VP9_VAAPI_HWACCEL
225 #if CONFIG_VP9_VDPAU_HWACCEL
228 #if CONFIG_VP9_VULKAN_HWACCEL
235 #if CONFIG_VP9_VAAPI_HWACCEL
238 #if CONFIG_VP9_VULKAN_HWACCEL
245 #if CONFIG_VP9_VAAPI_HWACCEL
248 #if CONFIG_VP9_VULKAN_HWACCEL
254 *fmtp++ =
s->pix_fmt;
264 s->gf_fmt =
s->pix_fmt;
272 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
275 s->last_fmt =
s->pix_fmt;
276 s->sb_cols = (
w + 63) >> 6;
277 s->sb_rows = (
h + 63) >> 6;
278 s->cols = (
w + 7) >> 3;
279 s->rows = (
h + 7) >> 3;
282 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
286 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
287 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
290 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
291 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
292 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
293 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
294 assign(
s->above_mode_ctx, uint8_t *, 16);
296 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
297 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
298 assign(
s->above_partition_ctx, uint8_t *, 8);
299 assign(
s->above_skip_ctx, uint8_t *, 8);
300 assign(
s->above_txfm_ctx, uint8_t *, 8);
301 assign(
s->above_segpred_ctx, uint8_t *, 8);
302 assign(
s->above_intra_ctx, uint8_t *, 8);
303 assign(
s->above_comp_ctx, uint8_t *, 8);
304 assign(
s->above_ref_ctx, uint8_t *, 8);
305 assign(
s->above_filter_ctx, uint8_t *, 8);
310 for (
i = 0;
i <
s->active_tile_cols;
i++)
314 if (
s->s.h.bpp !=
s->last_bpp) {
317 s->last_bpp =
s->s.h.bpp;
328 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
335 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
336 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
338 int sbs =
s->sb_cols *
s->sb_rows;
342 16 * 16 + 2 * chroma_eobs) * sbs);
357 for (
i = 1;
i <
s->active_tile_cols;
i++)
360 for (
i = 0;
i <
s->active_tile_cols;
i++) {
362 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
363 16 * 16 + 2 * chroma_eobs);
364 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
366 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
367 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
368 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
369 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
370 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
374 if (!
s->td[
i].block_structure)
379 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
396 return m - ((v + 1) >> 1);
403 static const uint8_t inv_map_table[255] = {
404 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
405 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
406 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
407 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
408 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
409 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
410 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
411 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
412 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
413 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
414 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
415 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
416 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
417 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
418 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
419 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
420 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
421 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
469 s->s.h.bpp = 8 +
bits * 2;
470 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
476 s->ss_h =
s->ss_v = 0;
490 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
502 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
513 s->ss_h =
s->ss_v = 1;
514 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
525 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
527 const uint8_t *data2;
552 s->last_keyframe =
s->s.h.keyframe;
555 last_invisible =
s->s.h.invisible;
558 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
560 if (
s->s.h.keyframe) {
568 s->s.h.refreshrefmask = 0xff;
574 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
575 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
576 if (
s->s.h.intraonly) {
585 s->ss_h =
s->ss_v = 1;
588 s->bytesperpixel = 1;
601 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
603 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
605 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
606 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
607 !
s->s.refs[
s->s.h.refidx[1]].f ||
608 !
s->s.refs[
s->s.h.refidx[2]].f) {
613 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
614 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
616 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
617 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
619 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
620 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
628 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
636 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
637 s->s.h.signbias[0] !=
s->s.h.signbias[2];
638 if (
s->s.h.allowcompinter) {
639 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
640 s->s.h.fixcompref = 2;
641 s->s.h.varcompref[0] = 0;
642 s->s.h.varcompref[1] = 1;
643 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
644 s->s.h.fixcompref = 1;
645 s->s.h.varcompref[0] = 0;
646 s->s.h.varcompref[1] = 2;
648 s->s.h.fixcompref = 0;
649 s->s.h.varcompref[0] = 1;
650 s->s.h.varcompref[1] = 2;
655 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
656 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
658 if (
s->s.h.keyframe ||
s->s.h.intraonly)
659 s->s.h.framectxid = 0;
662 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
664 s->s.h.lf_delta.ref[0] = 1;
665 s->s.h.lf_delta.ref[1] = 0;
666 s->s.h.lf_delta.ref[2] = -1;
667 s->s.h.lf_delta.ref[3] = -1;
668 s->s.h.lf_delta.mode[0] = 0;
669 s->s.h.lf_delta.mode[1] = 0;
670 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
676 if (
s->s.h.filter.sharpness != sharp) {
677 for (
i = 1;
i <= 63;
i++) {
681 limit >>= (sharp + 3) >> 2;
686 s->filter_lut.lim_lut[
i] =
limit;
687 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
690 s->s.h.filter.sharpness = sharp;
691 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
692 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
693 for (
i = 0;
i < 4;
i++)
696 for (
i = 0;
i < 2;
i++)
707 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
708 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
709 #if FF_API_CODEC_PROPS
717 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
718 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
719 for (
i = 0;
i < 7;
i++)
722 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
723 for (
i = 0;
i < 3;
i++)
729 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
730 for (
i = 0;
i < 8;
i++) {
731 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
733 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
735 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
736 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
737 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
744 s->s.h.segmentation.temporal = 0;
745 s->s.h.segmentation.update_map = 0;
749 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
750 int qyac, qydc, quvac, quvdc, lflvl, sh;
752 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
753 if (
s->s.h.segmentation.absolute_vals)
758 qyac =
s->s.h.yac_qi;
770 sh =
s->s.h.filter.level >= 32;
771 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
772 if (
s->s.h.segmentation.absolute_vals)
775 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
777 lflvl =
s->s.h.filter.level;
779 if (
s->s.h.lf_delta.enabled) {
780 s->s.h.segmentation.feat[
i].lflvl[0][0] =
781 s->s.h.segmentation.feat[
i].lflvl[0][1] =
783 for (j = 1; j < 4; j++) {
784 s->s.h.segmentation.feat[
i].lflvl[j][0] =
786 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
787 s->s.h.segmentation.feat[
i].lflvl[j][1] =
789 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
792 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
793 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
803 for (
s->s.h.tiling.log2_tile_cols = 0;
804 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
805 s->s.h.tiling.log2_tile_cols++) ;
806 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
808 while (
max >
s->s.h.tiling.log2_tile_cols) {
810 s->s.h.tiling.log2_tile_cols++;
815 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
816 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols) || changed) {
821 for (
i = 0;
i <
s->active_tile_cols;
i++)
826 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
828 s->s.h.tiling.tile_cols : 1;
833 n_range_coders =
s->s.h.tiling.tile_cols;
840 for (
i = 0;
i <
s->active_tile_cols;
i++) {
843 rc += n_range_coders;
848 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
849 int valid_ref_frame = 0;
850 for (
i = 0;
i < 3;
i++) {
852 int refw =
ref->width, refh =
ref->height;
856 "Ref pixfmt (%s) did not match current frame (%s)",
860 }
else if (refw ==
w && refh ==
h) {
861 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
865 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
867 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
872 s->mvscale[
i][0] = (refw << 14) /
w;
873 s->mvscale[
i][1] = (refh << 14) /
h;
874 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
875 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
879 if (!valid_ref_frame) {
880 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
885 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
886 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
896 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
903 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
907 if (size2 >
size - (data2 -
data)) {
920 for (
i = 0;
i <
s->active_tile_cols;
i++) {
921 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
922 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
923 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
925 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
927 s->td[
i].nb_block_structure = 0;
933 s->prob.p =
s->prob_ctx[
c].p;
936 if (
s->s.h.lossless) {
940 if (
s->s.h.txfmmode == 3)
944 for (
i = 0;
i < 2;
i++)
947 for (
i = 0;
i < 2;
i++)
948 for (j = 0; j < 2; j++)
950 s->prob.p.tx16p[
i][j] =
952 for (
i = 0;
i < 2;
i++)
953 for (j = 0; j < 3; j++)
955 s->prob.p.tx32p[
i][j] =
961 for (
i = 0;
i < 4;
i++) {
962 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
964 for (j = 0; j < 2; j++)
965 for (k = 0; k < 2; k++)
966 for (l = 0; l < 6; l++)
967 for (m = 0; m < 6; m++) {
968 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
969 uint8_t *
r =
ref[j][k][l][m];
970 if (m >= 3 && l == 0)
972 for (n = 0; n < 3; n++) {
981 for (j = 0; j < 2; j++)
982 for (k = 0; k < 2; k++)
983 for (l = 0; l < 6; l++)
984 for (m = 0; m < 6; m++) {
985 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
986 uint8_t *
r =
ref[j][k][l][m];
993 if (
s->s.h.txfmmode ==
i)
998 for (
i = 0;
i < 3;
i++)
1001 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
1002 for (
i = 0;
i < 7;
i++)
1003 for (j = 0; j < 3; j++)
1005 s->prob.p.mv_mode[
i][j] =
1009 for (
i = 0;
i < 4;
i++)
1010 for (j = 0; j < 2; j++)
1012 s->prob.p.filter[
i][j] =
1015 for (
i = 0;
i < 4;
i++)
1019 if (
s->s.h.allowcompinter) {
1021 if (
s->s.h.comppredmode)
1024 for (
i = 0;
i < 5;
i++)
1033 for (
i = 0;
i < 5;
i++) {
1035 s->prob.p.single_ref[
i][0] =
1038 s->prob.p.single_ref[
i][1] =
1044 for (
i = 0;
i < 5;
i++)
1046 s->prob.p.comp_ref[
i] =
1050 for (
i = 0;
i < 4;
i++)
1051 for (j = 0; j < 9; j++)
1053 s->prob.p.y_mode[
i][j] =
1056 for (
i = 0;
i < 4;
i++)
1057 for (j = 0; j < 4; j++)
1058 for (k = 0; k < 3; k++)
1060 s->prob.p.partition[3 -
i][j][k] =
1062 s->prob.p.partition[3 -
i][j][k]);
1065 for (
i = 0;
i < 3;
i++)
1069 for (
i = 0;
i < 2;
i++) {
1071 s->prob.p.mv_comp[
i].sign =
1074 for (j = 0; j < 10; j++)
1076 s->prob.p.mv_comp[
i].classes[j] =
1080 s->prob.p.mv_comp[
i].class0 =
1083 for (j = 0; j < 10; j++)
1085 s->prob.p.mv_comp[
i].bits[j] =
1089 for (
i = 0;
i < 2;
i++) {
1090 for (j = 0; j < 2; j++)
1091 for (k = 0; k < 3; k++)
1093 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1096 for (j = 0; j < 3; j++)
1098 s->prob.p.mv_comp[
i].fp[j] =
1102 if (
s->s.h.highprecisionmvs) {
1103 for (
i = 0;
i < 2;
i++) {
1105 s->prob.p.mv_comp[
i].class0_hp =
1109 s->prob.p.mv_comp[
i].hp =
1115 return (data2 -
data) + size2;
1119 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1122 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1125 s->prob.p.partition[bl][
c];
1127 ptrdiff_t hbs = 4 >> bl;
1129 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1130 int bytesperpixel =
s->bytesperpixel;
1135 }
else if (col + hbs < s->cols) {
1136 if (row + hbs < s->rows) {
1144 yoff += hbs * 8 * y_stride;
1145 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1150 yoff += hbs * 8 * bytesperpixel;
1151 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1155 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1157 yoff + 8 * hbs * bytesperpixel,
1158 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1159 yoff += hbs * 8 * y_stride;
1160 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1161 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1162 decode_sb(td, row + hbs, col + hbs, lflvl,
1163 yoff + 8 * hbs * bytesperpixel,
1164 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1168 "the four PARTITION_* terminal codes");
1172 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1174 yoff + 8 * hbs * bytesperpixel,
1175 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1180 }
else if (row + hbs < s->rows) {
1183 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1184 yoff += hbs * 8 * y_stride;
1185 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1186 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1193 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1199 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1203 ptrdiff_t hbs = 4 >> bl;
1205 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1206 int bytesperpixel =
s->bytesperpixel;
1211 }
else if (td->
b->
bl == bl) {
1214 yoff += hbs * 8 * y_stride;
1215 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1217 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1218 yoff += hbs * 8 * bytesperpixel;
1219 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1224 if (col + hbs < s->cols) {
1225 if (row + hbs < s->rows) {
1226 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1227 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1228 yoff += hbs * 8 * y_stride;
1229 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1230 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1232 yoff + 8 * hbs * bytesperpixel,
1233 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1235 yoff += hbs * 8 * bytesperpixel;
1236 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1237 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1239 }
else if (row + hbs < s->rows) {
1240 yoff += hbs * 8 * y_stride;
1241 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1242 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1249 int sb_start = ( idx * n) >> log2_n;
1250 int sb_end = ((idx + 1) * n) >> log2_n;
1251 *start =
FFMIN(sb_start, n) << 3;
1252 *end =
FFMIN(sb_end, n) << 3;
1260 for (
i = 0;
i <
s->active_tile_cols;
i++)
1269 for (
int i = 0;
i < 3;
i++)
1272 for (
i = 0;
i < 8;
i++) {
1285 ff_cbs_fragment_free(&
s->current_frag);
1286 ff_cbs_close(&
s->cbc);
1297 int row, col, tile_row, tile_col,
ret;
1299 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1301 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1304 ls_y =
f->linesize[0];
1305 ls_uv =
f->linesize[1];
1306 bytesperpixel =
s->bytesperpixel;
1309 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1311 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1313 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1316 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1317 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1324 if (tile_size >
size)
1335 for (row = tile_row_start; row < tile_row_end;
1336 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1338 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1340 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1342 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1347 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1356 td->
c = &td->
c_b[tile_col];
1359 for (col = tile_col_start;
1361 col += 8, yoff2 += 64 * bytesperpixel,
1362 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1366 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1387 if (row + 8 <
s->rows) {
1388 memcpy(
s->intra_pred_data[0],
1389 f->data[0] + yoff + 63 * ls_y,
1390 8 *
s->cols * bytesperpixel);
1391 memcpy(
s->intra_pred_data[1],
1392 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1393 8 *
s->cols * bytesperpixel >>
s->ss_h);
1394 memcpy(
s->intra_pred_data[2],
1395 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1396 8 *
s->cols * bytesperpixel >>
s->ss_h);
1400 if (
s->s.h.filter.level) {
1403 lflvl_ptr =
s->lflvl;
1404 for (col = 0; col <
s->cols;
1405 col += 8, yoff2 += 64 * bytesperpixel,
1406 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1423 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1428 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1429 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1430 unsigned tile_cols_len;
1431 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1436 ls_y =
f->linesize[0];
1437 ls_uv =
f->linesize[1];
1440 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1442 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1443 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1444 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1446 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1448 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1450 td->
c = &td->
c_b[tile_row];
1451 for (row = tile_row_start; row < tile_row_end;
1452 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1453 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1454 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1458 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1467 for (col = tile_col_start;
1469 col += 8, yoff2 += 64 * bytesperpixel,
1470 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1473 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1480 tile_cols_len = tile_col_end - tile_col_start;
1481 if (row + 8 <
s->rows) {
1482 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1483 f->data[0] + yoff + 63 * ls_y,
1484 8 * tile_cols_len * bytesperpixel);
1485 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1486 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1487 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1488 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1489 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1490 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1493 vp9_report_tile_progress(
s, row >> 3, 1);
1503 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1505 int bytesperpixel =
s->bytesperpixel, col,
i;
1509 ls_y =
f->linesize[0];
1510 ls_uv =
f->linesize[1];
1512 for (
i = 0;
i <
s->sb_rows;
i++) {
1513 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1515 if (
s->s.h.filter.level) {
1516 yoff = (ls_y * 64)*
i;
1517 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1518 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1519 for (col = 0; col <
s->cols;
1520 col += 8, yoff += 64 * bytesperpixel,
1521 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1534 unsigned int tile, nb_blocks = 0;
1536 if (
s->s.h.segmentation.enabled) {
1538 nb_blocks +=
s->td[
tile].nb_block_structure;
1546 par->
qp =
s->s.h.yac_qi;
1547 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1548 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1549 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1550 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1551 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1554 unsigned int block = 0;
1555 unsigned int tile, block_tile;
1564 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1571 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1572 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1573 if (
s->s.h.segmentation.absolute_vals)
1574 b->delta_qp -= par->
qp;
1594 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1598 ret = ff_cbs_read_packet(
s->cbc, &
s->current_frag,
pkt);
1600 ff_cbs_fragment_reset(&
s->current_frag);
1605 unit = &
s->current_frag.units[0];
1609 s->frame_header = &rf->
header;
1612 ff_cbs_fragment_reset(&
s->current_frag);
1614 }
else if (
ret == 0) {
1615 if (!
s->s.refs[
ref].f) {
1617 ff_cbs_fragment_reset(&
s->current_frag);
1620 for (
int i = 0;
i < 8;
i++)
1624 ff_cbs_fragment_reset(&
s->current_frag);
1636 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1638 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1643 ff_cbs_fragment_reset(&
s->current_frag);
1648 s->s.frames[
CUR_FRAME].frame_header =
s->frame_header;
1651 if (
s->s.h.keyframe)
1655 if (
s->s.h.lossless)
1669 for (
i = 0;
i < 8;
i++) {
1671 s->s.h.refreshrefmask & (1 <<
i) ?
1687 for (
i = 0;
i < 8;
i++) {
1689 s->s.h.refreshrefmask & (1 <<
i) ?
1697 memset(
s->above_partition_ctx, 0,
s->cols);
1698 memset(
s->above_skip_ctx, 0,
s->cols);
1699 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1700 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1704 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1705 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1706 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1707 memset(
s->above_segpred_ctx, 0,
s->cols);
1712 "Failed to allocate block buffers\n");
1715 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1718 for (
i = 0;
i < 4;
i++) {
1719 for (j = 0; j < 2; j++)
1720 for (k = 0; k < 2; k++)
1721 for (l = 0; l < 6; l++)
1722 for (m = 0; m < 6; m++)
1723 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1724 s->prob.coef[
i][j][k][l][m], 3);
1725 if (
s->s.h.txfmmode ==
i)
1728 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1730 }
else if (!
s->s.h.refreshctx) {
1736 for (
i = 0;
i <
s->sb_rows;
i++)
1742 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1743 s->td[
i].b =
s->td[
i].b_base;
1744 s->td[
i].block =
s->td[
i].block_base;
1745 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1746 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1747 s->td[
i].eob =
s->td[
i].eob_base;
1748 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1749 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1750 s->td[
i].error_info = 0;
1755 int tile_row, tile_col;
1759 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1760 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1763 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1764 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1771 if (tile_size >
size)
1794 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1795 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1796 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1798 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1802 }
while (
s->pass++ == 1);
1804 if (
s->td->error_info < 0) {
1806 s->td->error_info = 0;
1817 ff_cbs_fragment_reset(&
s->current_frag);
1821 for (
int i = 0;
i < 8;
i++)
1824 if (!
s->s.h.invisible) {
1832 ff_cbs_fragment_reset(&
s->current_frag);
1842 for (
i = 0;
i < 3;
i++)
1845 for (
i = 0;
i < 8;
i++) {
1850 ff_cbs_fragment_reset(&
s->current_frag);
1851 ff_cbs_flush(
s->cbc);
1863 s->s.h.filter.sharpness = -1;
1885 for (
int i = 0;
i < 3;
i++)
1887 for (
int i = 0;
i < 8;
i++)
1890 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1893 for (
int i = 0;
i < 8;
i++)
1896 s->frame_header = ssrc->frame_header;
1899 s->s.h.invisible = ssrc->s.h.invisible;
1900 s->s.h.keyframe = ssrc->s.h.keyframe;
1901 s->s.h.intraonly = ssrc->s.h.intraonly;
1902 s->ss_v = ssrc->ss_v;
1903 s->ss_h = ssrc->ss_h;
1904 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1905 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1906 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1907 s->bytesperpixel = ssrc->bytesperpixel;
1908 s->gf_fmt = ssrc->gf_fmt;
1911 s->s.h.bpp = ssrc->s.h.bpp;
1912 s->bpp_index = ssrc->bpp_index;
1913 s->pix_fmt = ssrc->pix_fmt;
1914 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1915 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1916 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1917 sizeof(
s->s.h.segmentation.feat));
1939 .bsfs =
"vp9_superframe_split",
1941 #if CONFIG_VP9_DXVA2_HWACCEL
1944 #if CONFIG_VP9_D3D11VA_HWACCEL
1947 #if CONFIG_VP9_D3D11VA2_HWACCEL
1950 #if CONFIG_VP9_D3D12VA_HWACCEL
1953 #if CONFIG_VP9_NVDEC_HWACCEL
1956 #if CONFIG_VP9_VAAPI_HWACCEL
1959 #if CONFIG_VP9_VDPAU_HWACCEL
1962 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
1965 #if CONFIG_VP9_VULKAN_HWACCEL