39 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
109 f->segmentation_map =
NULL;
110 f->hwaccel_picture_private =
NULL;
122 sz = 64 *
s->sb_cols *
s->sb_rows;
123 if (sz !=
s->frame_extradata_pool_size) {
126 if (!
s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
130 s->frame_extradata_pool_size = sz;
136 memset(
f->extradata->data, 0,
f->extradata->size);
138 f->segmentation_map =
f->extradata->data;
146 if (!
f->hwaccel_priv_buf)
148 f->hwaccel_picture_private =
f->hwaccel_priv_buf->data;
175 if (
src->hwaccel_picture_private) {
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
199 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
204 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
208 switch (
s->pix_fmt) {
210 #if CONFIG_VP9_VDPAU_HWACCEL
214 #if CONFIG_VP9_DXVA2_HWACCEL
217 #if CONFIG_VP9_D3D11VA_HWACCEL
221 #if CONFIG_VP9_NVDEC_HWACCEL
224 #if CONFIG_VP9_VAAPI_HWACCEL
229 #if CONFIG_VP9_NVDEC_HWACCEL
232 #if CONFIG_VP9_VAAPI_HWACCEL
238 *fmtp++ =
s->pix_fmt;
246 s->gf_fmt =
s->pix_fmt;
254 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
257 s->last_fmt =
s->pix_fmt;
258 s->sb_cols = (
w + 63) >> 6;
259 s->sb_rows = (
h + 63) >> 6;
260 s->cols = (
w + 7) >> 3;
261 s->rows = (
h + 7) >> 3;
264 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
268 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
269 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
292 for (
i = 0;
i <
s->active_tile_cols;
i++)
296 if (
s->s.h.bpp !=
s->last_bpp) {
299 s->last_bpp =
s->s.h.bpp;
309 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
312 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
316 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
317 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
319 int sbs =
s->sb_cols *
s->sb_rows;
322 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
323 16 * 16 + 2 * chroma_eobs) * sbs);
324 if (!
td->b_base || !
td->block_base)
326 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
327 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
328 td->eob_base = (
uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
329 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
330 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
334 if (!
td->block_structure)
338 for (
i = 1;
i <
s->active_tile_cols;
i++)
341 for (
i = 0;
i <
s->active_tile_cols;
i++) {
343 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
344 16 * 16 + 2 * chroma_eobs);
345 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
347 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
348 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
349 s->td[
i].eob_base = (
uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
350 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
351 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
355 if (!
s->td[
i].block_structure)
360 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
377 return m - ((v + 1) >> 1);
384 static const uint8_t inv_map_table[255] = {
385 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
386 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
387 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
388 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
389 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
390 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
391 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
392 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
393 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
394 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
395 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
396 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
397 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
398 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
399 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
400 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
401 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
402 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
450 s->s.h.bpp = 8 +
bits * 2;
451 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
457 s->ss_h =
s->ss_v = 0;
471 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
483 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
494 s->ss_h =
s->ss_v = 1;
495 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
506 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
532 s->last_keyframe =
s->s.h.keyframe;
535 last_invisible =
s->s.h.invisible;
538 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
540 if (
s->s.h.keyframe) {
548 s->s.h.refreshrefmask = 0xff;
554 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
555 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
556 if (
s->s.h.intraonly) {
565 s->ss_h =
s->ss_v = 1;
568 s->bytesperpixel = 1;
581 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
583 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
585 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
586 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
587 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
588 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
593 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
594 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
596 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
597 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
599 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
600 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
608 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
615 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
616 s->s.h.signbias[0] !=
s->s.h.signbias[2];
617 if (
s->s.h.allowcompinter) {
618 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
619 s->s.h.fixcompref = 2;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 1;
622 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
623 s->s.h.fixcompref = 1;
624 s->s.h.varcompref[0] = 0;
625 s->s.h.varcompref[1] = 2;
627 s->s.h.fixcompref = 0;
628 s->s.h.varcompref[0] = 1;
629 s->s.h.varcompref[1] = 2;
634 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
635 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
637 if (
s->s.h.keyframe ||
s->s.h.intraonly)
638 s->s.h.framectxid = 0;
641 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
643 s->s.h.lf_delta.ref[0] = 1;
644 s->s.h.lf_delta.ref[1] = 0;
645 s->s.h.lf_delta.ref[2] = -1;
646 s->s.h.lf_delta.ref[3] = -1;
647 s->s.h.lf_delta.mode[0] = 0;
648 s->s.h.lf_delta.mode[1] = 0;
649 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
655 if (
s->s.h.filter.sharpness != sharp) {
656 for (
i = 1;
i <= 63;
i++) {
660 limit >>= (sharp + 3) >> 2;
661 limit =
FFMIN(limit, 9 - sharp);
663 limit =
FFMAX(limit, 1);
665 s->filter_lut.lim_lut[
i] = limit;
666 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) + limit;
669 s->s.h.filter.sharpness = sharp;
670 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
671 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
672 for (
i = 0;
i < 4;
i++)
675 for (
i = 0;
i < 2;
i++)
686 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
687 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
692 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
693 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
694 for (
i = 0;
i < 7;
i++)
697 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
698 for (
i = 0;
i < 3;
i++)
704 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
705 for (
i = 0;
i < 8;
i++) {
706 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
708 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
710 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
711 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
712 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
718 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
719 int qyac, qydc, quvac, quvdc, lflvl, sh;
721 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
722 if (
s->s.h.segmentation.absolute_vals)
723 qyac = av_clip_uintp2(
s->s.h.segmentation.feat[
i].q_val, 8);
725 qyac = av_clip_uintp2(
s->s.h.yac_qi +
s->s.h.segmentation.feat[
i].q_val, 8);
727 qyac =
s->s.h.yac_qi;
729 qydc = av_clip_uintp2(qyac +
s->s.h.ydc_qdelta, 8);
730 quvdc = av_clip_uintp2(qyac +
s->s.h.uvdc_qdelta, 8);
731 quvac = av_clip_uintp2(qyac +
s->s.h.uvac_qdelta, 8);
732 qyac = av_clip_uintp2(qyac, 8);
739 sh =
s->s.h.filter.level >= 32;
740 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
741 if (
s->s.h.segmentation.absolute_vals)
742 lflvl = av_clip_uintp2(
s->s.h.segmentation.feat[
i].lf_val, 6);
744 lflvl = av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
746 lflvl =
s->s.h.filter.level;
748 if (
s->s.h.lf_delta.enabled) {
749 s->s.h.segmentation.feat[
i].lflvl[0][0] =
750 s->s.h.segmentation.feat[
i].lflvl[0][1] =
751 av_clip_uintp2(lflvl + (
s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
752 for (j = 1; j < 4; j++) {
753 s->s.h.segmentation.feat[
i].lflvl[j][0] =
754 av_clip_uintp2(lflvl + ((
s->s.h.lf_delta.ref[j] +
755 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
756 s->s.h.segmentation.feat[
i].lflvl[j][1] =
757 av_clip_uintp2(lflvl + ((
s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
761 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
762 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
772 for (
s->s.h.tiling.log2_tile_cols = 0;
773 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
774 s->s.h.tiling.log2_tile_cols++) ;
775 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
777 while (
max >
s->s.h.tiling.log2_tile_cols) {
779 s->s.h.tiling.log2_tile_cols++;
784 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
785 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
790 for (
i = 0;
i <
s->active_tile_cols;
i++)
795 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
798 s->s.h.tiling.tile_cols : 1;
803 n_range_coders =
s->s.h.tiling.tile_cols;
810 for (
i = 0;
i <
s->active_tile_cols;
i++) {
813 rc += n_range_coders;
818 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
819 int valid_ref_frame = 0;
820 for (
i = 0;
i < 3;
i++) {
822 int refw =
ref->width, refh =
ref->height;
826 "Ref pixfmt (%s) did not match current frame (%s)",
830 }
else if (refw ==
w && refh ==
h) {
831 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
835 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
837 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
842 s->mvscale[
i][0] = (refw << 14) /
w;
843 s->mvscale[
i][1] = (refh << 14) /
h;
844 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
845 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
849 if (!valid_ref_frame) {
850 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
855 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
856 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
866 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
873 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
877 if (size2 >
size - (data2 -
data)) {
890 for (
i = 0;
i <
s->active_tile_cols;
i++) {
891 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
892 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
893 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
895 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
897 s->td[
i].nb_block_structure = 0;
903 s->prob.p =
s->prob_ctx[
c].p;
906 if (
s->s.h.lossless) {
910 if (
s->s.h.txfmmode == 3)
914 for (
i = 0;
i < 2;
i++)
917 for (
i = 0;
i < 2;
i++)
918 for (j = 0; j < 2; j++)
920 s->prob.p.tx16p[
i][j] =
922 for (
i = 0;
i < 2;
i++)
923 for (j = 0; j < 3; j++)
925 s->prob.p.tx32p[
i][j] =
931 for (
i = 0;
i < 4;
i++) {
934 for (j = 0; j < 2; j++)
935 for (k = 0; k < 2; k++)
936 for (l = 0; l < 6; l++)
937 for (m = 0; m < 6; m++) {
938 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
940 if (m >= 3 && l == 0)
942 for (n = 0; n < 3; n++) {
951 for (j = 0; j < 2; j++)
952 for (k = 0; k < 2; k++)
953 for (l = 0; l < 6; l++)
954 for (m = 0; m < 6; m++) {
955 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
963 if (
s->s.h.txfmmode ==
i)
968 for (
i = 0;
i < 3;
i++)
971 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
972 for (
i = 0;
i < 7;
i++)
973 for (j = 0; j < 3; j++)
975 s->prob.p.mv_mode[
i][j] =
979 for (
i = 0;
i < 4;
i++)
980 for (j = 0; j < 2; j++)
982 s->prob.p.filter[
i][j] =
985 for (
i = 0;
i < 4;
i++)
989 if (
s->s.h.allowcompinter) {
991 if (
s->s.h.comppredmode)
994 for (
i = 0;
i < 5;
i++)
1003 for (
i = 0;
i < 5;
i++) {
1005 s->prob.p.single_ref[
i][0] =
1008 s->prob.p.single_ref[
i][1] =
1014 for (
i = 0;
i < 5;
i++)
1016 s->prob.p.comp_ref[
i] =
1020 for (
i = 0;
i < 4;
i++)
1021 for (j = 0; j < 9; j++)
1023 s->prob.p.y_mode[
i][j] =
1026 for (
i = 0;
i < 4;
i++)
1027 for (j = 0; j < 4; j++)
1028 for (k = 0; k < 3; k++)
1030 s->prob.p.partition[3 -
i][j][k] =
1032 s->prob.p.partition[3 -
i][j][k]);
1035 for (
i = 0;
i < 3;
i++)
1039 for (
i = 0;
i < 2;
i++) {
1041 s->prob.p.mv_comp[
i].sign =
1044 for (j = 0; j < 10; j++)
1046 s->prob.p.mv_comp[
i].classes[j] =
1050 s->prob.p.mv_comp[
i].class0 =
1053 for (j = 0; j < 10; j++)
1055 s->prob.p.mv_comp[
i].bits[j] =
1059 for (
i = 0;
i < 2;
i++) {
1060 for (j = 0; j < 2; j++)
1061 for (k = 0; k < 3; k++)
1063 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1066 for (j = 0; j < 3; j++)
1068 s->prob.p.mv_comp[
i].fp[j] =
1072 if (
s->s.h.highprecisionmvs) {
1073 for (
i = 0;
i < 2;
i++) {
1075 s->prob.p.mv_comp[
i].class0_hp =
1079 s->prob.p.mv_comp[
i].hp =
1085 return (data2 -
data) + size2;
1089 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1092 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1093 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1095 s->prob.p.partition[bl][
c];
1097 ptrdiff_t hbs = 4 >> bl;
1099 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1100 int bytesperpixel =
s->bytesperpixel;
1105 }
else if (col + hbs < s->cols) {
1106 if (row + hbs < s->rows) {
1114 yoff += hbs * 8 * y_stride;
1115 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1120 yoff += hbs * 8 * bytesperpixel;
1121 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1125 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1127 yoff + 8 * hbs * bytesperpixel,
1128 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1129 yoff += hbs * 8 * y_stride;
1130 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1131 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1141 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1143 yoff + 8 * hbs * bytesperpixel,
1144 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1149 }
else if (row + hbs < s->rows) {
1152 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1153 yoff += hbs * 8 * y_stride;
1154 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1155 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1162 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1164 td->counts.partition[bl][
c][bp]++;
1168 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1172 ptrdiff_t hbs = 4 >> bl;
1174 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1175 int bytesperpixel =
s->bytesperpixel;
1180 }
else if (
td->b->bl == bl) {
1183 yoff += hbs * 8 * y_stride;
1184 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1186 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1187 yoff += hbs * 8 * bytesperpixel;
1188 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1193 if (col + hbs < s->cols) {
1194 if (row + hbs < s->rows) {
1195 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1196 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1197 yoff += hbs * 8 * y_stride;
1198 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1201 yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1204 yoff += hbs * 8 * bytesperpixel;
1205 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1208 }
else if (row + hbs < s->rows) {
1209 yoff += hbs * 8 * y_stride;
1210 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1218 int sb_start = ( idx * n) >> log2_n;
1219 int sb_end = ((idx + 1) * n) >> log2_n;
1220 *start =
FFMIN(sb_start, n) << 3;
1229 for (
i = 0;
i <
s->active_tile_cols;
i++)
1238 for (
i = 0;
i < 3;
i++) {
1243 for (
i = 0;
i < 8;
i++) {
1261 int row, col, tile_row, tile_col,
ret;
1263 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1265 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1268 ls_y =
f->linesize[0];
1269 ls_uv =
f->linesize[1];
1270 bytesperpixel =
s->bytesperpixel;
1273 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1275 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1277 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1280 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1281 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1288 if (tile_size >
size) {
1303 for (row = tile_row_start; row < tile_row_end;
1304 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1306 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1308 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1310 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1311 td->tile_col_start = tile_col_start;
1313 memset(
td->left_partition_ctx, 0, 8);
1314 memset(
td->left_skip_ctx, 0, 8);
1315 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1320 memset(
td->left_y_nnz_ctx, 0, 16);
1321 memset(
td->left_uv_nnz_ctx, 0, 32);
1322 memset(
td->left_segpred_ctx, 0, 8);
1324 td->c = &
td->c_b[tile_col];
1327 for (col = tile_col_start;
1329 col += 8, yoff2 += 64 * bytesperpixel,
1330 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1334 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1355 if (row + 8 <
s->rows) {
1356 memcpy(
s->intra_pred_data[0],
1357 f->data[0] + yoff + 63 * ls_y,
1358 8 *
s->cols * bytesperpixel);
1359 memcpy(
s->intra_pred_data[1],
1360 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1361 8 *
s->cols * bytesperpixel >>
s->ss_h);
1362 memcpy(
s->intra_pred_data[2],
1363 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1364 8 *
s->cols * bytesperpixel >>
s->ss_h);
1368 if (
s->s.h.filter.level) {
1371 lflvl_ptr =
s->lflvl;
1372 for (col = 0; col <
s->cols;
1373 col += 8, yoff2 += 64 * bytesperpixel,
1374 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1391 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1396 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1397 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1398 unsigned tile_cols_len;
1399 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1404 ls_y =
f->linesize[0];
1405 ls_uv =
f->linesize[1];
1408 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1409 td->tile_col_start = tile_col_start;
1410 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1411 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1412 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1414 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1416 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1418 td->c = &
td->c_b[tile_row];
1419 for (row = tile_row_start; row < tile_row_end;
1420 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1421 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1422 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1424 memset(
td->left_partition_ctx, 0, 8);
1425 memset(
td->left_skip_ctx, 0, 8);
1426 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1431 memset(
td->left_y_nnz_ctx, 0, 16);
1432 memset(
td->left_uv_nnz_ctx, 0, 32);
1433 memset(
td->left_segpred_ctx, 0, 8);
1435 for (col = tile_col_start;
1437 col += 8, yoff2 += 64 * bytesperpixel,
1438 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1441 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1448 tile_cols_len = tile_col_end - tile_col_start;
1449 if (row + 8 <
s->rows) {
1450 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1451 f->data[0] + yoff + 63 * ls_y,
1452 8 * tile_cols_len * bytesperpixel);
1453 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1454 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1455 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1456 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1457 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1461 vp9_report_tile_progress(
s, row >> 3, 1);
1471 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1473 int bytesperpixel =
s->bytesperpixel, col,
i;
1477 ls_y =
f->linesize[0];
1478 ls_uv =
f->linesize[1];
1480 for (
i = 0;
i <
s->sb_rows;
i++) {
1481 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1483 if (
s->s.h.filter.level) {
1484 yoff = (ls_y * 64)*
i;
1485 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1486 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1487 for (col = 0; col <
s->cols;
1488 col += 8, yoff += 64 * bytesperpixel,
1489 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1502 unsigned int tile, nb_blocks = 0;
1504 if (
s->s.h.segmentation.enabled) {
1505 for (tile = 0; tile <
s->active_tile_cols; tile++)
1506 nb_blocks +=
s->td[tile].nb_block_structure;
1514 par->
qp =
s->s.h.yac_qi;
1515 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1516 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1517 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1518 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1519 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1522 unsigned int block = 0;
1523 unsigned int tile, block_tile;
1525 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1528 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1530 unsigned int row =
td->block_structure[block_tile].row;
1531 unsigned int col =
td->block_structure[block_tile].col;
1532 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1536 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1537 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1539 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1540 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1541 if (
s->s.h.segmentation.absolute_vals)
1542 b->delta_qp -= par->
qp;
1559 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1564 }
else if (
ret == 0) {
1565 if (!
s->s.refs[
ref].f->buf[0]) {
1578 for (
i = 0;
i < 8;
i++) {
1579 if (
s->next_refs[
i].f->buf[0])
1581 if (
s->s.refs[
i].f->buf[0] &&
1591 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1594 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1600 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1608 f->key_frame =
s->s.h.keyframe;
1618 for (
i = 0;
i < 8;
i++) {
1619 if (
s->next_refs[
i].f->buf[0])
1621 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1623 }
else if (
s->s.refs[
i].f->buf[0]) {
1644 memset(
s->above_partition_ctx, 0,
s->cols);
1645 memset(
s->above_skip_ctx, 0,
s->cols);
1646 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1647 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1651 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1652 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1653 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1654 memset(
s->above_segpred_ctx, 0,
s->cols);
1659 "Failed to allocate block buffers\n");
1662 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1665 for (
i = 0;
i < 4;
i++) {
1666 for (j = 0; j < 2; j++)
1667 for (k = 0; k < 2; k++)
1668 for (l = 0; l < 6; l++)
1669 for (m = 0; m < 6; m++)
1670 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1671 s->prob.coef[
i][j][k][l][m], 3);
1672 if (
s->s.h.txfmmode ==
i)
1675 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1677 }
else if (!
s->s.h.refreshctx) {
1683 for (
i = 0;
i <
s->sb_rows;
i++)
1689 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1690 s->td[
i].b =
s->td[
i].b_base;
1691 s->td[
i].block =
s->td[
i].block_base;
1692 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1693 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1694 s->td[
i].eob =
s->td[
i].eob_base;
1695 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1696 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1697 s->td[
i].error_info = 0;
1702 int tile_row, tile_col;
1706 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1707 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1710 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1711 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1718 if (tile_size >
size)
1743 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1744 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1745 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1747 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1751 }
while (
s->pass++ == 1);
1754 if (
s->td->error_info < 0) {
1756 s->td->error_info = 0;
1767 for (
i = 0;
i < 8;
i++) {
1768 if (
s->s.refs[
i].f->buf[0])
1770 if (
s->next_refs[
i].f->buf[0] &&
1775 if (!
s->s.h.invisible) {
1789 for (
i = 0;
i < 3;
i++)
1791 for (
i = 0;
i < 8;
i++)
1800 for (
i = 0;
i < 3;
i++) {
1802 if (!
s->s.frames[
i].tf.f) {
1808 for (
i = 0;
i < 8;
i++) {
1811 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f) {
1826 s->s.h.filter.sharpness = -1;
1837 for (
i = 0;
i < 3;
i++) {
1838 if (
s->s.frames[
i].tf.f->buf[0])
1840 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1845 for (
i = 0;
i < 8;
i++) {
1846 if (
s->s.refs[
i].f->buf[0])
1848 if (ssrc->next_refs[
i].f->buf[0]) {
1854 s->s.h.invisible = ssrc->s.h.invisible;
1855 s->s.h.keyframe = ssrc->s.h.keyframe;
1856 s->s.h.intraonly = ssrc->s.h.intraonly;
1857 s->ss_v = ssrc->ss_v;
1858 s->ss_h = ssrc->ss_h;
1859 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1860 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1861 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1862 s->bytesperpixel = ssrc->bytesperpixel;
1863 s->gf_fmt = ssrc->gf_fmt;
1866 s->s.h.bpp = ssrc->s.h.bpp;
1867 s->bpp_index = ssrc->bpp_index;
1868 s->pix_fmt = ssrc->pix_fmt;
1869 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1870 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1871 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1872 sizeof(
s->s.h.segmentation.feat));
1893 .bsfs =
"vp9_superframe_split",
1895 #if CONFIG_VP9_DXVA2_HWACCEL
1898 #if CONFIG_VP9_D3D11VA_HWACCEL
1901 #if CONFIG_VP9_D3D11VA2_HWACCEL
1904 #if CONFIG_VP9_NVDEC_HWACCEL
1907 #if CONFIG_VP9_VAAPI_HWACCEL
1910 #if CONFIG_VP9_VDPAU_HWACCEL