24 #include "config_components.h"
46 #define VP9_SYNCCODE 0x498342
65 for (
i = 0;
i < n;
i++)
103 f->segmentation_map =
NULL;
104 f->hwaccel_picture_private =
NULL;
116 sz = 64 *
s->sb_cols *
s->sb_rows;
117 if (sz !=
s->frame_extradata_pool_size) {
120 if (!
s->frame_extradata_pool) {
121 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
130 memset(
f->extradata->data, 0,
f->extradata->size);
132 f->segmentation_map =
f->extradata->data;
140 if (!
f->hwaccel_priv_buf)
142 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
169 if (
src->hwaccel_picture_private) {
185 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
186 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
187 CONFIG_VP9_NVDEC_HWACCEL + \
188 CONFIG_VP9_VAAPI_HWACCEL + \
189 CONFIG_VP9_VDPAU_HWACCEL + \
190 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
194 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
199 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
203 switch (
s->pix_fmt) {
206 #if CONFIG_VP9_DXVA2_HWACCEL
209 #if CONFIG_VP9_D3D11VA_HWACCEL
213 #if CONFIG_VP9_NVDEC_HWACCEL
216 #if CONFIG_VP9_VAAPI_HWACCEL
219 #if CONFIG_VP9_VDPAU_HWACCEL
222 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
227 #if CONFIG_VP9_NVDEC_HWACCEL
230 #if CONFIG_VP9_VAAPI_HWACCEL
233 #if CONFIG_VP9_VDPAU_HWACCEL
238 #if CONFIG_VP9_VAAPI_HWACCEL
244 *fmtp++ =
s->pix_fmt;
252 s->gf_fmt =
s->pix_fmt;
260 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
263 s->last_fmt =
s->pix_fmt;
264 s->sb_cols = (
w + 63) >> 6;
265 s->sb_rows = (
h + 63) >> 6;
266 s->cols = (
w + 7) >> 3;
267 s->rows = (
h + 7) >> 3;
270 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
274 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
275 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
278 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
279 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
280 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
281 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
282 assign(
s->above_mode_ctx, uint8_t *, 16);
284 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
285 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
286 assign(
s->above_partition_ctx, uint8_t *, 8);
287 assign(
s->above_skip_ctx, uint8_t *, 8);
288 assign(
s->above_txfm_ctx, uint8_t *, 8);
289 assign(
s->above_segpred_ctx, uint8_t *, 8);
290 assign(
s->above_intra_ctx, uint8_t *, 8);
291 assign(
s->above_comp_ctx, uint8_t *, 8);
292 assign(
s->above_ref_ctx, uint8_t *, 8);
293 assign(
s->above_filter_ctx, uint8_t *, 8);
298 for (
i = 0;
i <
s->active_tile_cols;
i++)
302 if (
s->s.h.bpp !=
s->last_bpp) {
305 s->last_bpp =
s->s.h.bpp;
315 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
318 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
322 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
323 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
325 int sbs =
s->sb_cols *
s->sb_rows;
328 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
329 16 * 16 + 2 * chroma_eobs) * sbs);
330 if (!
td->b_base || !
td->block_base)
332 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
333 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
334 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
335 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
336 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
340 if (!
td->block_structure)
344 for (
i = 1;
i <
s->active_tile_cols;
i++)
347 for (
i = 0;
i <
s->active_tile_cols;
i++) {
349 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
350 16 * 16 + 2 * chroma_eobs);
351 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
353 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
354 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
355 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
356 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
357 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
361 if (!
s->td[
i].block_structure)
366 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
383 return m - ((v + 1) >> 1);
390 static const uint8_t inv_map_table[255] = {
391 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
392 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
393 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
394 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
395 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
396 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
397 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
398 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
399 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
400 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
401 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
402 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
403 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
404 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
405 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
406 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
407 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
408 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
456 s->s.h.bpp = 8 +
bits * 2;
457 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
463 s->ss_h =
s->ss_v = 0;
477 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
489 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
500 s->ss_h =
s->ss_v = 1;
501 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
512 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
514 const uint8_t *data2;
538 s->last_keyframe =
s->s.h.keyframe;
541 last_invisible =
s->s.h.invisible;
544 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
546 if (
s->s.h.keyframe) {
554 s->s.h.refreshrefmask = 0xff;
560 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
561 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
562 if (
s->s.h.intraonly) {
571 s->ss_h =
s->ss_v = 1;
574 s->bytesperpixel = 1;
587 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
589 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
591 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
592 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
593 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
594 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
599 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
600 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
602 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
603 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
605 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
606 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
614 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
621 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
622 s->s.h.signbias[0] !=
s->s.h.signbias[2];
623 if (
s->s.h.allowcompinter) {
624 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
625 s->s.h.fixcompref = 2;
626 s->s.h.varcompref[0] = 0;
627 s->s.h.varcompref[1] = 1;
628 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
629 s->s.h.fixcompref = 1;
630 s->s.h.varcompref[0] = 0;
631 s->s.h.varcompref[1] = 2;
633 s->s.h.fixcompref = 0;
634 s->s.h.varcompref[0] = 1;
635 s->s.h.varcompref[1] = 2;
640 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
641 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
643 if (
s->s.h.keyframe ||
s->s.h.intraonly)
644 s->s.h.framectxid = 0;
647 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
649 s->s.h.lf_delta.ref[0] = 1;
650 s->s.h.lf_delta.ref[1] = 0;
651 s->s.h.lf_delta.ref[2] = -1;
652 s->s.h.lf_delta.ref[3] = -1;
653 s->s.h.lf_delta.mode[0] = 0;
654 s->s.h.lf_delta.mode[1] = 0;
655 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
661 if (
s->s.h.filter.sharpness != sharp) {
662 for (
i = 1;
i <= 63;
i++) {
666 limit >>= (sharp + 3) >> 2;
671 s->filter_lut.lim_lut[
i] =
limit;
672 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
675 s->s.h.filter.sharpness = sharp;
676 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
677 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
678 for (
i = 0;
i < 4;
i++)
681 for (
i = 0;
i < 2;
i++)
692 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
693 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
698 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
699 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
700 for (
i = 0;
i < 7;
i++)
703 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
704 for (
i = 0;
i < 3;
i++)
710 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
711 for (
i = 0;
i < 8;
i++) {
712 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
714 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
716 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
717 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
718 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
724 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
725 int qyac, qydc, quvac, quvdc, lflvl, sh;
727 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
728 if (
s->s.h.segmentation.absolute_vals)
733 qyac =
s->s.h.yac_qi;
745 sh =
s->s.h.filter.level >= 32;
746 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
747 if (
s->s.h.segmentation.absolute_vals)
750 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
752 lflvl =
s->s.h.filter.level;
754 if (
s->s.h.lf_delta.enabled) {
755 s->s.h.segmentation.feat[
i].lflvl[0][0] =
756 s->s.h.segmentation.feat[
i].lflvl[0][1] =
758 for (j = 1; j < 4; j++) {
759 s->s.h.segmentation.feat[
i].lflvl[j][0] =
761 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
762 s->s.h.segmentation.feat[
i].lflvl[j][1] =
764 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
767 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
768 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
778 for (
s->s.h.tiling.log2_tile_cols = 0;
779 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
780 s->s.h.tiling.log2_tile_cols++) ;
781 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
783 while (
max >
s->s.h.tiling.log2_tile_cols) {
785 s->s.h.tiling.log2_tile_cols++;
790 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
791 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
796 for (
i = 0;
i <
s->active_tile_cols;
i++)
801 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
803 s->s.h.tiling.tile_cols : 1;
808 n_range_coders =
s->s.h.tiling.tile_cols;
815 for (
i = 0;
i <
s->active_tile_cols;
i++) {
818 rc += n_range_coders;
823 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
824 int valid_ref_frame = 0;
825 for (
i = 0;
i < 3;
i++) {
827 int refw =
ref->width, refh =
ref->height;
831 "Ref pixfmt (%s) did not match current frame (%s)",
835 }
else if (refw ==
w && refh ==
h) {
836 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
840 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
842 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
847 s->mvscale[
i][0] = (refw << 14) /
w;
848 s->mvscale[
i][1] = (refh << 14) /
h;
849 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
850 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
854 if (!valid_ref_frame) {
855 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
860 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
861 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
871 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
878 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
882 if (size2 >
size - (data2 -
data)) {
895 for (
i = 0;
i <
s->active_tile_cols;
i++) {
896 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
897 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
898 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
900 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
902 s->td[
i].nb_block_structure = 0;
908 s->prob.p =
s->prob_ctx[
c].p;
911 if (
s->s.h.lossless) {
915 if (
s->s.h.txfmmode == 3)
919 for (
i = 0;
i < 2;
i++)
922 for (
i = 0;
i < 2;
i++)
923 for (j = 0; j < 2; j++)
925 s->prob.p.tx16p[
i][j] =
927 for (
i = 0;
i < 2;
i++)
928 for (j = 0; j < 3; j++)
930 s->prob.p.tx32p[
i][j] =
936 for (
i = 0;
i < 4;
i++) {
937 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
939 for (j = 0; j < 2; j++)
940 for (k = 0; k < 2; k++)
941 for (l = 0; l < 6; l++)
942 for (m = 0; m < 6; m++) {
943 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
944 uint8_t *
r =
ref[j][k][l][m];
945 if (m >= 3 && l == 0)
947 for (n = 0; n < 3; n++) {
956 for (j = 0; j < 2; j++)
957 for (k = 0; k < 2; k++)
958 for (l = 0; l < 6; l++)
959 for (m = 0; m < 6; m++) {
960 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
961 uint8_t *
r =
ref[j][k][l][m];
968 if (
s->s.h.txfmmode ==
i)
973 for (
i = 0;
i < 3;
i++)
976 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
977 for (
i = 0;
i < 7;
i++)
978 for (j = 0; j < 3; j++)
980 s->prob.p.mv_mode[
i][j] =
984 for (
i = 0;
i < 4;
i++)
985 for (j = 0; j < 2; j++)
987 s->prob.p.filter[
i][j] =
990 for (
i = 0;
i < 4;
i++)
994 if (
s->s.h.allowcompinter) {
996 if (
s->s.h.comppredmode)
999 for (
i = 0;
i < 5;
i++)
1008 for (
i = 0;
i < 5;
i++) {
1010 s->prob.p.single_ref[
i][0] =
1013 s->prob.p.single_ref[
i][1] =
1019 for (
i = 0;
i < 5;
i++)
1021 s->prob.p.comp_ref[
i] =
1025 for (
i = 0;
i < 4;
i++)
1026 for (j = 0; j < 9; j++)
1028 s->prob.p.y_mode[
i][j] =
1031 for (
i = 0;
i < 4;
i++)
1032 for (j = 0; j < 4; j++)
1033 for (k = 0; k < 3; k++)
1035 s->prob.p.partition[3 -
i][j][k] =
1037 s->prob.p.partition[3 -
i][j][k]);
1040 for (
i = 0;
i < 3;
i++)
1044 for (
i = 0;
i < 2;
i++) {
1046 s->prob.p.mv_comp[
i].sign =
1049 for (j = 0; j < 10; j++)
1051 s->prob.p.mv_comp[
i].classes[j] =
1055 s->prob.p.mv_comp[
i].class0 =
1058 for (j = 0; j < 10; j++)
1060 s->prob.p.mv_comp[
i].bits[j] =
1064 for (
i = 0;
i < 2;
i++) {
1065 for (j = 0; j < 2; j++)
1066 for (k = 0; k < 3; k++)
1068 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1071 for (j = 0; j < 3; j++)
1073 s->prob.p.mv_comp[
i].fp[j] =
1077 if (
s->s.h.highprecisionmvs) {
1078 for (
i = 0;
i < 2;
i++) {
1080 s->prob.p.mv_comp[
i].class0_hp =
1084 s->prob.p.mv_comp[
i].hp =
1090 return (data2 -
data) + size2;
1094 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1097 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1098 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1100 s->prob.p.partition[bl][
c];
1102 ptrdiff_t hbs = 4 >> bl;
1104 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1105 int bytesperpixel =
s->bytesperpixel;
1110 }
else if (col + hbs < s->cols) {
1111 if (row + hbs < s->rows) {
1119 yoff += hbs * 8 * y_stride;
1120 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1125 yoff += hbs * 8 * bytesperpixel;
1126 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1130 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1132 yoff + 8 * hbs * bytesperpixel,
1133 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1134 yoff += hbs * 8 * y_stride;
1135 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1136 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1138 yoff + 8 * hbs * bytesperpixel,
1139 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1146 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1148 yoff + 8 * hbs * bytesperpixel,
1149 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1154 }
else if (row + hbs < s->rows) {
1157 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1158 yoff += hbs * 8 * y_stride;
1159 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1160 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1167 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1169 td->counts.partition[bl][
c][bp]++;
1173 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1177 ptrdiff_t hbs = 4 >> bl;
1179 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1180 int bytesperpixel =
s->bytesperpixel;
1185 }
else if (
td->b->bl == bl) {
1188 yoff += hbs * 8 * y_stride;
1189 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1191 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1192 yoff += hbs * 8 * bytesperpixel;
1193 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1198 if (col + hbs < s->cols) {
1199 if (row + hbs < s->rows) {
1200 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1201 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1202 yoff += hbs * 8 * y_stride;
1203 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1206 yoff + 8 * hbs * bytesperpixel,
1207 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1209 yoff += hbs * 8 * bytesperpixel;
1210 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1213 }
else if (row + hbs < s->rows) {
1214 yoff += hbs * 8 * y_stride;
1215 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1223 int sb_start = ( idx * n) >> log2_n;
1224 int sb_end = ((idx + 1) * n) >> log2_n;
1225 *start =
FFMIN(sb_start, n) << 3;
1226 *end =
FFMIN(sb_end, n) << 3;
1234 for (
i = 0;
i <
s->active_tile_cols;
i++)
1243 for (
i = 0;
i < 3;
i++) {
1248 for (
i = 0;
i < 8;
i++) {
1269 int row, col, tile_row, tile_col,
ret;
1271 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1273 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1276 ls_y =
f->linesize[0];
1277 ls_uv =
f->linesize[1];
1278 bytesperpixel =
s->bytesperpixel;
1281 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1283 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1285 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1288 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1289 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1296 if (tile_size >
size) {
1311 for (row = tile_row_start; row < tile_row_end;
1312 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1314 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1316 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1318 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1319 td->tile_col_start = tile_col_start;
1321 memset(
td->left_partition_ctx, 0, 8);
1322 memset(
td->left_skip_ctx, 0, 8);
1323 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1328 memset(
td->left_y_nnz_ctx, 0, 16);
1329 memset(
td->left_uv_nnz_ctx, 0, 32);
1330 memset(
td->left_segpred_ctx, 0, 8);
1332 td->c = &
td->c_b[tile_col];
1335 for (col = tile_col_start;
1337 col += 8, yoff2 += 64 * bytesperpixel,
1338 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1342 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1363 if (row + 8 <
s->rows) {
1364 memcpy(
s->intra_pred_data[0],
1365 f->data[0] + yoff + 63 * ls_y,
1366 8 *
s->cols * bytesperpixel);
1367 memcpy(
s->intra_pred_data[1],
1368 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1369 8 *
s->cols * bytesperpixel >>
s->ss_h);
1370 memcpy(
s->intra_pred_data[2],
1371 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1372 8 *
s->cols * bytesperpixel >>
s->ss_h);
1376 if (
s->s.h.filter.level) {
1379 lflvl_ptr =
s->lflvl;
1380 for (col = 0; col <
s->cols;
1381 col += 8, yoff2 += 64 * bytesperpixel,
1382 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1399 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1404 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1405 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1406 unsigned tile_cols_len;
1407 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1412 ls_y =
f->linesize[0];
1413 ls_uv =
f->linesize[1];
1416 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1417 td->tile_col_start = tile_col_start;
1418 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1419 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1420 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1422 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1424 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1426 td->c = &
td->c_b[tile_row];
1427 for (row = tile_row_start; row < tile_row_end;
1428 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1429 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1430 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1432 memset(
td->left_partition_ctx, 0, 8);
1433 memset(
td->left_skip_ctx, 0, 8);
1434 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1439 memset(
td->left_y_nnz_ctx, 0, 16);
1440 memset(
td->left_uv_nnz_ctx, 0, 32);
1441 memset(
td->left_segpred_ctx, 0, 8);
1443 for (col = tile_col_start;
1445 col += 8, yoff2 += 64 * bytesperpixel,
1446 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1449 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1456 tile_cols_len = tile_col_end - tile_col_start;
1457 if (row + 8 <
s->rows) {
1458 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1459 f->data[0] + yoff + 63 * ls_y,
1460 8 * tile_cols_len * bytesperpixel);
1461 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1462 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1463 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1464 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1465 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1466 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1469 vp9_report_tile_progress(
s, row >> 3, 1);
1479 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1481 int bytesperpixel =
s->bytesperpixel, col,
i;
1485 ls_y =
f->linesize[0];
1486 ls_uv =
f->linesize[1];
1488 for (
i = 0;
i <
s->sb_rows;
i++) {
1489 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1491 if (
s->s.h.filter.level) {
1492 yoff = (ls_y * 64)*
i;
1493 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1494 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1495 for (col = 0; col <
s->cols;
1496 col += 8, yoff += 64 * bytesperpixel,
1497 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1510 unsigned int tile, nb_blocks = 0;
1512 if (
s->s.h.segmentation.enabled) {
1513 for (tile = 0; tile <
s->active_tile_cols; tile++)
1514 nb_blocks +=
s->td[tile].nb_block_structure;
1522 par->
qp =
s->s.h.yac_qi;
1523 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1524 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1525 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1526 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1527 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1530 unsigned int block = 0;
1531 unsigned int tile, block_tile;
1533 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1536 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1538 unsigned int row =
td->block_structure[block_tile].row;
1539 unsigned int col =
td->block_structure[block_tile].col;
1540 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1544 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1545 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1547 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1548 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1549 if (
s->s.h.segmentation.absolute_vals)
1550 b->delta_qp -= par->
qp;
1567 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1572 }
else if (
ret == 0) {
1573 if (!
s->s.refs[
ref].f->buf[0]) {
1581 for (
i = 0;
i < 8;
i++) {
1582 if (
s->next_refs[
i].f->buf[0])
1584 if (
s->s.refs[
i].f->buf[0] &&
1594 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1597 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1603 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1611 f->key_frame =
s->s.h.keyframe;
1621 for (
i = 0;
i < 8;
i++) {
1622 if (
s->next_refs[
i].f->buf[0])
1624 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1626 }
else if (
s->s.refs[
i].f->buf[0]) {
1647 memset(
s->above_partition_ctx, 0,
s->cols);
1648 memset(
s->above_skip_ctx, 0,
s->cols);
1649 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1650 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1654 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1655 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1656 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1657 memset(
s->above_segpred_ctx, 0,
s->cols);
1662 "Failed to allocate block buffers\n");
1665 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1668 for (
i = 0;
i < 4;
i++) {
1669 for (j = 0; j < 2; j++)
1670 for (k = 0; k < 2; k++)
1671 for (l = 0; l < 6; l++)
1672 for (m = 0; m < 6; m++)
1673 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1674 s->prob.coef[
i][j][k][l][m], 3);
1675 if (
s->s.h.txfmmode ==
i)
1678 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1680 }
else if (!
s->s.h.refreshctx) {
1686 for (
i = 0;
i <
s->sb_rows;
i++)
1692 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1693 s->td[
i].b =
s->td[
i].b_base;
1694 s->td[
i].block =
s->td[
i].block_base;
1695 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1696 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1697 s->td[
i].eob =
s->td[
i].eob_base;
1698 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1699 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1700 s->td[
i].error_info = 0;
1705 int tile_row, tile_col;
1709 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1710 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1713 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1714 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1721 if (tile_size >
size)
1746 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1747 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1748 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1750 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1754 }
while (
s->pass++ == 1);
1757 if (
s->td->error_info < 0) {
1759 s->td->error_info = 0;
1770 for (
i = 0;
i < 8;
i++) {
1771 if (
s->s.refs[
i].f->buf[0])
1773 if (
s->next_refs[
i].f->buf[0] &&
1778 if (!
s->s.h.invisible) {
1792 for (
i = 0;
i < 3;
i++)
1794 for (
i = 0;
i < 8;
i++)
1804 s->s.h.filter.sharpness = -1;
1814 for (
int i = 0;
i < 3;
i++) {
1816 if (!
s->s.frames[
i].tf.f)
1819 for (
int i = 0;
i < 8;
i++) {
1822 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1834 for (
i = 0;
i < 3;
i++) {
1835 if (
s->s.frames[
i].tf.f->buf[0])
1837 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1842 for (
i = 0;
i < 8;
i++) {
1843 if (
s->s.refs[
i].f->buf[0])
1845 if (ssrc->next_refs[
i].f->buf[0]) {
1851 s->s.h.invisible = ssrc->s.h.invisible;
1852 s->s.h.keyframe = ssrc->s.h.keyframe;
1853 s->s.h.intraonly = ssrc->s.h.intraonly;
1854 s->ss_v = ssrc->ss_v;
1855 s->ss_h = ssrc->ss_h;
1856 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1857 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1858 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1859 s->bytesperpixel = ssrc->bytesperpixel;
1860 s->gf_fmt = ssrc->gf_fmt;
1863 s->s.h.bpp = ssrc->s.h.bpp;
1864 s->bpp_index = ssrc->bpp_index;
1865 s->pix_fmt = ssrc->pix_fmt;
1866 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1867 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1868 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1869 sizeof(
s->s.h.segmentation.feat));
1891 .bsfs =
"vp9_superframe_split",
1893 #if CONFIG_VP9_DXVA2_HWACCEL
1896 #if CONFIG_VP9_D3D11VA_HWACCEL
1899 #if CONFIG_VP9_D3D11VA2_HWACCEL
1902 #if CONFIG_VP9_NVDEC_HWACCEL
1905 #if CONFIG_VP9_VAAPI_HWACCEL
1908 #if CONFIG_VP9_VDPAU_HWACCEL
1911 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL