24 #include "config_components.h"
48 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
105 f->segmentation_map =
NULL;
117 sz = 64 *
s->sb_cols *
s->sb_rows;
118 if (sz !=
s->frame_extradata_pool_size) {
122 if (!
s->frame_extradata_pool) {
123 s->frame_extradata_pool_size = 0;
127 s->frame_extradata_pool_size = sz;
135 f->segmentation_map =
f->extradata;
164 src->hwaccel_picture_private);
171 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
172 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
173 CONFIG_VP9_D3D12VA_HWACCEL + \
174 CONFIG_VP9_NVDEC_HWACCEL + \
175 CONFIG_VP9_VAAPI_HWACCEL + \
176 CONFIG_VP9_VDPAU_HWACCEL + \
177 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
181 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
186 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
190 switch (
s->pix_fmt) {
193 #if CONFIG_VP9_DXVA2_HWACCEL
196 #if CONFIG_VP9_D3D11VA_HWACCEL
200 #if CONFIG_VP9_D3D12VA_HWACCEL
203 #if CONFIG_VP9_NVDEC_HWACCEL
206 #if CONFIG_VP9_VAAPI_HWACCEL
209 #if CONFIG_VP9_VDPAU_HWACCEL
212 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
217 #if CONFIG_VP9_NVDEC_HWACCEL
220 #if CONFIG_VP9_VAAPI_HWACCEL
223 #if CONFIG_VP9_VDPAU_HWACCEL
230 #if CONFIG_VP9_VAAPI_HWACCEL
237 #if CONFIG_VP9_VAAPI_HWACCEL
243 *fmtp++ =
s->pix_fmt;
251 s->gf_fmt =
s->pix_fmt;
259 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
262 s->last_fmt =
s->pix_fmt;
263 s->sb_cols = (
w + 63) >> 6;
264 s->sb_rows = (
h + 63) >> 6;
265 s->cols = (
w + 7) >> 3;
266 s->rows = (
h + 7) >> 3;
269 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
273 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
274 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
277 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
278 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
279 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
280 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
281 assign(
s->above_mode_ctx, uint8_t *, 16);
283 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
284 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
285 assign(
s->above_partition_ctx, uint8_t *, 8);
286 assign(
s->above_skip_ctx, uint8_t *, 8);
287 assign(
s->above_txfm_ctx, uint8_t *, 8);
288 assign(
s->above_segpred_ctx, uint8_t *, 8);
289 assign(
s->above_intra_ctx, uint8_t *, 8);
290 assign(
s->above_comp_ctx, uint8_t *, 8);
291 assign(
s->above_ref_ctx, uint8_t *, 8);
292 assign(
s->above_filter_ctx, uint8_t *, 8);
297 for (
i = 0;
i <
s->active_tile_cols;
i++)
301 if (
s->s.h.bpp !=
s->last_bpp) {
304 s->last_bpp =
s->s.h.bpp;
314 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
317 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
321 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
322 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
324 int sbs =
s->sb_cols *
s->sb_rows;
327 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
328 16 * 16 + 2 * chroma_eobs) * sbs);
329 if (!
td->b_base || !
td->block_base)
331 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
332 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
333 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
334 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
335 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
339 if (!
td->block_structure)
343 for (
i = 1;
i <
s->active_tile_cols;
i++)
346 for (
i = 0;
i <
s->active_tile_cols;
i++) {
348 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
349 16 * 16 + 2 * chroma_eobs);
350 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
352 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
353 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
354 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
355 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
356 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
360 if (!
s->td[
i].block_structure)
365 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
382 return m - ((v + 1) >> 1);
389 static const uint8_t inv_map_table[255] = {
390 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
391 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
392 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
393 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
394 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
395 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
396 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
397 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
398 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
399 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
400 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
401 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
402 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
403 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
404 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
405 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
406 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
407 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
455 s->s.h.bpp = 8 +
bits * 2;
456 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
462 s->ss_h =
s->ss_v = 0;
476 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
488 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
499 s->ss_h =
s->ss_v = 1;
500 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
511 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
513 const uint8_t *data2;
537 s->last_keyframe =
s->s.h.keyframe;
540 last_invisible =
s->s.h.invisible;
543 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
545 if (
s->s.h.keyframe) {
553 s->s.h.refreshrefmask = 0xff;
559 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
560 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
561 if (
s->s.h.intraonly) {
570 s->ss_h =
s->ss_v = 1;
573 s->bytesperpixel = 1;
586 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
588 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
590 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
591 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
592 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
593 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
598 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
599 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
601 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
602 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
604 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
605 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
613 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
620 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
621 s->s.h.signbias[0] !=
s->s.h.signbias[2];
622 if (
s->s.h.allowcompinter) {
623 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
624 s->s.h.fixcompref = 2;
625 s->s.h.varcompref[0] = 0;
626 s->s.h.varcompref[1] = 1;
627 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
628 s->s.h.fixcompref = 1;
629 s->s.h.varcompref[0] = 0;
630 s->s.h.varcompref[1] = 2;
632 s->s.h.fixcompref = 0;
633 s->s.h.varcompref[0] = 1;
634 s->s.h.varcompref[1] = 2;
639 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
640 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
642 if (
s->s.h.keyframe ||
s->s.h.intraonly)
643 s->s.h.framectxid = 0;
646 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
648 s->s.h.lf_delta.ref[0] = 1;
649 s->s.h.lf_delta.ref[1] = 0;
650 s->s.h.lf_delta.ref[2] = -1;
651 s->s.h.lf_delta.ref[3] = -1;
652 s->s.h.lf_delta.mode[0] = 0;
653 s->s.h.lf_delta.mode[1] = 0;
654 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
660 if (
s->s.h.filter.sharpness != sharp) {
661 for (
i = 1;
i <= 63;
i++) {
665 limit >>= (sharp + 3) >> 2;
670 s->filter_lut.lim_lut[
i] =
limit;
671 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
674 s->s.h.filter.sharpness = sharp;
675 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
676 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
677 for (
i = 0;
i < 4;
i++)
680 for (
i = 0;
i < 2;
i++)
691 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
692 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
697 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
698 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
699 for (
i = 0;
i < 7;
i++)
702 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
703 for (
i = 0;
i < 3;
i++)
709 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
710 for (
i = 0;
i < 8;
i++) {
711 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
713 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
715 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
716 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
717 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
723 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
724 int qyac, qydc, quvac, quvdc, lflvl, sh;
726 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
727 if (
s->s.h.segmentation.absolute_vals)
732 qyac =
s->s.h.yac_qi;
744 sh =
s->s.h.filter.level >= 32;
745 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
746 if (
s->s.h.segmentation.absolute_vals)
749 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
751 lflvl =
s->s.h.filter.level;
753 if (
s->s.h.lf_delta.enabled) {
754 s->s.h.segmentation.feat[
i].lflvl[0][0] =
755 s->s.h.segmentation.feat[
i].lflvl[0][1] =
757 for (j = 1; j < 4; j++) {
758 s->s.h.segmentation.feat[
i].lflvl[j][0] =
760 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
761 s->s.h.segmentation.feat[
i].lflvl[j][1] =
763 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
766 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
767 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
777 for (
s->s.h.tiling.log2_tile_cols = 0;
778 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
779 s->s.h.tiling.log2_tile_cols++) ;
780 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
782 while (
max >
s->s.h.tiling.log2_tile_cols) {
784 s->s.h.tiling.log2_tile_cols++;
789 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
790 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
795 for (
i = 0;
i <
s->active_tile_cols;
i++)
800 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
802 s->s.h.tiling.tile_cols : 1;
807 n_range_coders =
s->s.h.tiling.tile_cols;
814 for (
i = 0;
i <
s->active_tile_cols;
i++) {
817 rc += n_range_coders;
822 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
823 int valid_ref_frame = 0;
824 for (
i = 0;
i < 3;
i++) {
826 int refw =
ref->width, refh =
ref->height;
830 "Ref pixfmt (%s) did not match current frame (%s)",
834 }
else if (refw ==
w && refh ==
h) {
835 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
839 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
841 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
846 s->mvscale[
i][0] = (refw << 14) /
w;
847 s->mvscale[
i][1] = (refh << 14) /
h;
848 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
849 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
853 if (!valid_ref_frame) {
854 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
859 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
860 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
870 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
877 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
881 if (size2 >
size - (data2 -
data)) {
894 for (
i = 0;
i <
s->active_tile_cols;
i++) {
895 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
896 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
897 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
899 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
901 s->td[
i].nb_block_structure = 0;
907 s->prob.p =
s->prob_ctx[
c].p;
910 if (
s->s.h.lossless) {
914 if (
s->s.h.txfmmode == 3)
918 for (
i = 0;
i < 2;
i++)
921 for (
i = 0;
i < 2;
i++)
922 for (j = 0; j < 2; j++)
924 s->prob.p.tx16p[
i][j] =
926 for (
i = 0;
i < 2;
i++)
927 for (j = 0; j < 3; j++)
929 s->prob.p.tx32p[
i][j] =
935 for (
i = 0;
i < 4;
i++) {
936 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
938 for (j = 0; j < 2; j++)
939 for (k = 0; k < 2; k++)
940 for (l = 0; l < 6; l++)
941 for (m = 0; m < 6; m++) {
942 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
943 uint8_t *
r =
ref[j][k][l][m];
944 if (m >= 3 && l == 0)
946 for (n = 0; n < 3; n++) {
955 for (j = 0; j < 2; j++)
956 for (k = 0; k < 2; k++)
957 for (l = 0; l < 6; l++)
958 for (m = 0; m < 6; m++) {
959 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
960 uint8_t *
r =
ref[j][k][l][m];
967 if (
s->s.h.txfmmode ==
i)
972 for (
i = 0;
i < 3;
i++)
975 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
976 for (
i = 0;
i < 7;
i++)
977 for (j = 0; j < 3; j++)
979 s->prob.p.mv_mode[
i][j] =
983 for (
i = 0;
i < 4;
i++)
984 for (j = 0; j < 2; j++)
986 s->prob.p.filter[
i][j] =
989 for (
i = 0;
i < 4;
i++)
993 if (
s->s.h.allowcompinter) {
995 if (
s->s.h.comppredmode)
998 for (
i = 0;
i < 5;
i++)
1007 for (
i = 0;
i < 5;
i++) {
1009 s->prob.p.single_ref[
i][0] =
1012 s->prob.p.single_ref[
i][1] =
1018 for (
i = 0;
i < 5;
i++)
1020 s->prob.p.comp_ref[
i] =
1024 for (
i = 0;
i < 4;
i++)
1025 for (j = 0; j < 9; j++)
1027 s->prob.p.y_mode[
i][j] =
1030 for (
i = 0;
i < 4;
i++)
1031 for (j = 0; j < 4; j++)
1032 for (k = 0; k < 3; k++)
1034 s->prob.p.partition[3 -
i][j][k] =
1036 s->prob.p.partition[3 -
i][j][k]);
1039 for (
i = 0;
i < 3;
i++)
1043 for (
i = 0;
i < 2;
i++) {
1045 s->prob.p.mv_comp[
i].sign =
1048 for (j = 0; j < 10; j++)
1050 s->prob.p.mv_comp[
i].classes[j] =
1054 s->prob.p.mv_comp[
i].class0 =
1057 for (j = 0; j < 10; j++)
1059 s->prob.p.mv_comp[
i].bits[j] =
1063 for (
i = 0;
i < 2;
i++) {
1064 for (j = 0; j < 2; j++)
1065 for (k = 0; k < 3; k++)
1067 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1070 for (j = 0; j < 3; j++)
1072 s->prob.p.mv_comp[
i].fp[j] =
1076 if (
s->s.h.highprecisionmvs) {
1077 for (
i = 0;
i < 2;
i++) {
1079 s->prob.p.mv_comp[
i].class0_hp =
1083 s->prob.p.mv_comp[
i].hp =
1089 return (data2 -
data) + size2;
1093 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1096 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1097 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1099 s->prob.p.partition[bl][
c];
1101 ptrdiff_t hbs = 4 >> bl;
1103 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1104 int bytesperpixel =
s->bytesperpixel;
1109 }
else if (col + hbs < s->cols) {
1110 if (row + hbs < s->rows) {
1118 yoff += hbs * 8 * y_stride;
1119 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1124 yoff += hbs * 8 * bytesperpixel;
1125 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1129 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1131 yoff + 8 * hbs * bytesperpixel,
1132 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1133 yoff += hbs * 8 * y_stride;
1134 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1135 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1137 yoff + 8 * hbs * bytesperpixel,
1138 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1145 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1147 yoff + 8 * hbs * bytesperpixel,
1148 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1153 }
else if (row + hbs < s->rows) {
1156 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1157 yoff += hbs * 8 * y_stride;
1158 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1159 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1166 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1168 td->counts.partition[bl][
c][bp]++;
1172 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1176 ptrdiff_t hbs = 4 >> bl;
1178 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1179 int bytesperpixel =
s->bytesperpixel;
1184 }
else if (
td->b->bl == bl) {
1187 yoff += hbs * 8 * y_stride;
1188 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1190 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1191 yoff += hbs * 8 * bytesperpixel;
1192 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1197 if (col + hbs < s->cols) {
1198 if (row + hbs < s->rows) {
1199 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1200 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1201 yoff += hbs * 8 * y_stride;
1202 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1205 yoff + 8 * hbs * bytesperpixel,
1206 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1208 yoff += hbs * 8 * bytesperpixel;
1209 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1212 }
else if (row + hbs < s->rows) {
1213 yoff += hbs * 8 * y_stride;
1214 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1222 int sb_start = ( idx * n) >> log2_n;
1223 int sb_end = ((idx + 1) * n) >> log2_n;
1224 *start =
FFMIN(sb_start, n) << 3;
1225 *end =
FFMIN(sb_end, n) << 3;
1233 for (
i = 0;
i <
s->active_tile_cols;
i++)
1242 for (
i = 0;
i < 3;
i++) {
1247 for (
i = 0;
i < 8;
i++) {
1268 int row, col, tile_row, tile_col,
ret;
1270 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1272 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1275 ls_y =
f->linesize[0];
1276 ls_uv =
f->linesize[1];
1277 bytesperpixel =
s->bytesperpixel;
1280 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1282 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1284 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1287 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1288 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1295 if (tile_size >
size)
1306 for (row = tile_row_start; row < tile_row_end;
1307 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1309 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1311 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1313 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1314 td->tile_col_start = tile_col_start;
1316 memset(
td->left_partition_ctx, 0, 8);
1317 memset(
td->left_skip_ctx, 0, 8);
1318 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1323 memset(
td->left_y_nnz_ctx, 0, 16);
1324 memset(
td->left_uv_nnz_ctx, 0, 32);
1325 memset(
td->left_segpred_ctx, 0, 8);
1327 td->c = &
td->c_b[tile_col];
1330 for (col = tile_col_start;
1332 col += 8, yoff2 += 64 * bytesperpixel,
1333 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1337 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1358 if (row + 8 <
s->rows) {
1359 memcpy(
s->intra_pred_data[0],
1360 f->data[0] + yoff + 63 * ls_y,
1361 8 *
s->cols * bytesperpixel);
1362 memcpy(
s->intra_pred_data[1],
1363 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1364 8 *
s->cols * bytesperpixel >>
s->ss_h);
1365 memcpy(
s->intra_pred_data[2],
1366 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1367 8 *
s->cols * bytesperpixel >>
s->ss_h);
1371 if (
s->s.h.filter.level) {
1374 lflvl_ptr =
s->lflvl;
1375 for (col = 0; col <
s->cols;
1376 col += 8, yoff2 += 64 * bytesperpixel,
1377 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1394 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1399 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1400 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1401 unsigned tile_cols_len;
1402 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1407 ls_y =
f->linesize[0];
1408 ls_uv =
f->linesize[1];
1411 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1412 td->tile_col_start = tile_col_start;
1413 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1414 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1415 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1417 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1419 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1421 td->c = &
td->c_b[tile_row];
1422 for (row = tile_row_start; row < tile_row_end;
1423 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1424 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1425 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1427 memset(
td->left_partition_ctx, 0, 8);
1428 memset(
td->left_skip_ctx, 0, 8);
1429 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1434 memset(
td->left_y_nnz_ctx, 0, 16);
1435 memset(
td->left_uv_nnz_ctx, 0, 32);
1436 memset(
td->left_segpred_ctx, 0, 8);
1438 for (col = tile_col_start;
1440 col += 8, yoff2 += 64 * bytesperpixel,
1441 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1444 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1451 tile_cols_len = tile_col_end - tile_col_start;
1452 if (row + 8 <
s->rows) {
1453 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1454 f->data[0] + yoff + 63 * ls_y,
1455 8 * tile_cols_len * bytesperpixel);
1456 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1457 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1459 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1460 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1464 vp9_report_tile_progress(
s, row >> 3, 1);
1474 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1476 int bytesperpixel =
s->bytesperpixel, col,
i;
1480 ls_y =
f->linesize[0];
1481 ls_uv =
f->linesize[1];
1483 for (
i = 0;
i <
s->sb_rows;
i++) {
1484 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1486 if (
s->s.h.filter.level) {
1487 yoff = (ls_y * 64)*
i;
1488 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1489 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1490 for (col = 0; col <
s->cols;
1491 col += 8, yoff += 64 * bytesperpixel,
1492 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1505 unsigned int tile, nb_blocks = 0;
1507 if (
s->s.h.segmentation.enabled) {
1508 for (tile = 0; tile <
s->active_tile_cols; tile++)
1509 nb_blocks +=
s->td[tile].nb_block_structure;
1517 par->
qp =
s->s.h.yac_qi;
1518 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1519 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1520 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1521 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1522 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1525 unsigned int block = 0;
1526 unsigned int tile, block_tile;
1528 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1531 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1533 unsigned int row =
td->block_structure[block_tile].row;
1534 unsigned int col =
td->block_structure[block_tile].col;
1535 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1539 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1540 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1542 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1543 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1544 if (
s->s.h.segmentation.absolute_vals)
1545 b->delta_qp -= par->
qp;
1562 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1567 }
else if (
ret == 0) {
1568 if (!
s->s.refs[
ref].f->buf[0]) {
1576 for (
i = 0;
i < 8;
i++) {
1577 if (
s->next_refs[
i].f->buf[0])
1579 if (
s->s.refs[
i].f->buf[0] &&
1589 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1592 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1598 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1606 if (
s->s.h.keyframe)
1619 for (
i = 0;
i < 8;
i++) {
1620 if (
s->next_refs[
i].f->buf[0])
1622 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1624 }
else if (
s->s.refs[
i].f->buf[0]) {
1646 memset(
s->above_partition_ctx, 0,
s->cols);
1647 memset(
s->above_skip_ctx, 0,
s->cols);
1648 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1649 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1653 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1654 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1655 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1656 memset(
s->above_segpred_ctx, 0,
s->cols);
1661 "Failed to allocate block buffers\n");
1664 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1667 for (
i = 0;
i < 4;
i++) {
1668 for (j = 0; j < 2; j++)
1669 for (k = 0; k < 2; k++)
1670 for (l = 0; l < 6; l++)
1671 for (m = 0; m < 6; m++)
1672 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1673 s->prob.coef[
i][j][k][l][m], 3);
1674 if (
s->s.h.txfmmode ==
i)
1677 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1679 }
else if (!
s->s.h.refreshctx) {
1685 for (
i = 0;
i <
s->sb_rows;
i++)
1691 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1692 s->td[
i].b =
s->td[
i].b_base;
1693 s->td[
i].block =
s->td[
i].block_base;
1694 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1695 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1696 s->td[
i].eob =
s->td[
i].eob_base;
1697 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1698 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1699 s->td[
i].error_info = 0;
1704 int tile_row, tile_col;
1708 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1709 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1712 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1713 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1720 if (tile_size >
size)
1745 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1746 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1747 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1749 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1753 }
while (
s->pass++ == 1);
1756 if (
s->td->error_info < 0) {
1758 s->td->error_info = 0;
1769 for (
i = 0;
i < 8;
i++) {
1770 if (
s->s.refs[
i].f->buf[0])
1772 if (
s->next_refs[
i].f->buf[0] &&
1777 if (!
s->s.h.invisible) {
1791 for (
i = 0;
i < 3;
i++)
1793 for (
i = 0;
i < 8;
i++)
1806 s->s.h.filter.sharpness = -1;
1816 for (
int i = 0;
i < 3;
i++) {
1818 if (!
s->s.frames[
i].tf.f)
1821 for (
int i = 0;
i < 8;
i++) {
1824 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1836 for (
i = 0;
i < 3;
i++) {
1837 if (
s->s.frames[
i].tf.f->buf[0])
1839 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1844 for (
i = 0;
i < 8;
i++) {
1845 if (
s->s.refs[
i].f->buf[0])
1847 if (ssrc->next_refs[
i].f->buf[0]) {
1853 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1855 s->s.h.invisible = ssrc->s.h.invisible;
1856 s->s.h.keyframe = ssrc->s.h.keyframe;
1857 s->s.h.intraonly = ssrc->s.h.intraonly;
1858 s->ss_v = ssrc->ss_v;
1859 s->ss_h = ssrc->ss_h;
1860 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1861 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1862 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1863 s->bytesperpixel = ssrc->bytesperpixel;
1864 s->gf_fmt = ssrc->gf_fmt;
1867 s->s.h.bpp = ssrc->s.h.bpp;
1868 s->bpp_index = ssrc->bpp_index;
1869 s->pix_fmt = ssrc->pix_fmt;
1870 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1871 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1872 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1873 sizeof(
s->s.h.segmentation.feat));
1895 .bsfs =
"vp9_superframe_split",
1897 #if CONFIG_VP9_DXVA2_HWACCEL
1900 #if CONFIG_VP9_D3D11VA_HWACCEL
1903 #if CONFIG_VP9_D3D11VA2_HWACCEL
1906 #if CONFIG_VP9_D3D12VA_HWACCEL
1909 #if CONFIG_VP9_NVDEC_HWACCEL
1912 #if CONFIG_VP9_VAAPI_HWACCEL
1915 #if CONFIG_VP9_VDPAU_HWACCEL
1918 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL