24 #include "config_components.h"
48 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
105 f->segmentation_map =
NULL;
117 sz = 64 *
s->sb_cols *
s->sb_rows;
118 if (sz !=
s->frame_extradata_pool_size) {
121 if (!
s->frame_extradata_pool) {
122 s->frame_extradata_pool_size = 0;
126 s->frame_extradata_pool_size = sz;
133 memset(
f->extradata->data, 0,
f->extradata->size);
135 f->segmentation_map =
f->extradata->data;
166 src->hwaccel_picture_private);
177 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
178 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
179 CONFIG_VP9_NVDEC_HWACCEL + \
180 CONFIG_VP9_VAAPI_HWACCEL + \
181 CONFIG_VP9_VDPAU_HWACCEL + \
182 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
186 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
191 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
195 switch (
s->pix_fmt) {
198 #if CONFIG_VP9_DXVA2_HWACCEL
201 #if CONFIG_VP9_D3D11VA_HWACCEL
205 #if CONFIG_VP9_NVDEC_HWACCEL
208 #if CONFIG_VP9_VAAPI_HWACCEL
211 #if CONFIG_VP9_VDPAU_HWACCEL
214 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
219 #if CONFIG_VP9_NVDEC_HWACCEL
222 #if CONFIG_VP9_VAAPI_HWACCEL
225 #if CONFIG_VP9_VDPAU_HWACCEL
232 #if CONFIG_VP9_VAAPI_HWACCEL
239 #if CONFIG_VP9_VAAPI_HWACCEL
245 *fmtp++ =
s->pix_fmt;
253 s->gf_fmt =
s->pix_fmt;
261 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
264 s->last_fmt =
s->pix_fmt;
265 s->sb_cols = (
w + 63) >> 6;
266 s->sb_rows = (
h + 63) >> 6;
267 s->cols = (
w + 7) >> 3;
268 s->rows = (
h + 7) >> 3;
271 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
275 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
276 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
279 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
280 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
281 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
282 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
283 assign(
s->above_mode_ctx, uint8_t *, 16);
285 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
286 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
287 assign(
s->above_partition_ctx, uint8_t *, 8);
288 assign(
s->above_skip_ctx, uint8_t *, 8);
289 assign(
s->above_txfm_ctx, uint8_t *, 8);
290 assign(
s->above_segpred_ctx, uint8_t *, 8);
291 assign(
s->above_intra_ctx, uint8_t *, 8);
292 assign(
s->above_comp_ctx, uint8_t *, 8);
293 assign(
s->above_ref_ctx, uint8_t *, 8);
294 assign(
s->above_filter_ctx, uint8_t *, 8);
299 for (
i = 0;
i <
s->active_tile_cols;
i++)
303 if (
s->s.h.bpp !=
s->last_bpp) {
306 s->last_bpp =
s->s.h.bpp;
316 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
319 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
323 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
324 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
326 int sbs =
s->sb_cols *
s->sb_rows;
329 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
330 16 * 16 + 2 * chroma_eobs) * sbs);
331 if (!
td->b_base || !
td->block_base)
333 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
334 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
335 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
336 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
337 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
341 if (!
td->block_structure)
345 for (
i = 1;
i <
s->active_tile_cols;
i++)
348 for (
i = 0;
i <
s->active_tile_cols;
i++) {
350 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
351 16 * 16 + 2 * chroma_eobs);
352 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
354 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
355 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
356 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
357 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
358 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
362 if (!
s->td[
i].block_structure)
367 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
384 return m - ((v + 1) >> 1);
391 static const uint8_t inv_map_table[255] = {
392 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
393 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
394 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
395 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
396 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
397 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
398 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
399 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
400 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
401 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
402 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
403 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
404 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
405 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
406 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
407 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
408 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
409 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
457 s->s.h.bpp = 8 +
bits * 2;
458 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
464 s->ss_h =
s->ss_v = 0;
478 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
490 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
501 s->ss_h =
s->ss_v = 1;
502 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
513 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
515 const uint8_t *data2;
539 s->last_keyframe =
s->s.h.keyframe;
542 last_invisible =
s->s.h.invisible;
545 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
547 if (
s->s.h.keyframe) {
555 s->s.h.refreshrefmask = 0xff;
561 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
562 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
563 if (
s->s.h.intraonly) {
572 s->ss_h =
s->ss_v = 1;
575 s->bytesperpixel = 1;
588 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
590 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
592 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
593 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
594 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
595 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
600 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
601 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
603 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
604 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
606 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
607 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
615 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
622 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
623 s->s.h.signbias[0] !=
s->s.h.signbias[2];
624 if (
s->s.h.allowcompinter) {
625 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
626 s->s.h.fixcompref = 2;
627 s->s.h.varcompref[0] = 0;
628 s->s.h.varcompref[1] = 1;
629 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
630 s->s.h.fixcompref = 1;
631 s->s.h.varcompref[0] = 0;
632 s->s.h.varcompref[1] = 2;
634 s->s.h.fixcompref = 0;
635 s->s.h.varcompref[0] = 1;
636 s->s.h.varcompref[1] = 2;
641 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
642 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
644 if (
s->s.h.keyframe ||
s->s.h.intraonly)
645 s->s.h.framectxid = 0;
648 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
650 s->s.h.lf_delta.ref[0] = 1;
651 s->s.h.lf_delta.ref[1] = 0;
652 s->s.h.lf_delta.ref[2] = -1;
653 s->s.h.lf_delta.ref[3] = -1;
654 s->s.h.lf_delta.mode[0] = 0;
655 s->s.h.lf_delta.mode[1] = 0;
656 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
662 if (
s->s.h.filter.sharpness != sharp) {
663 for (
i = 1;
i <= 63;
i++) {
667 limit >>= (sharp + 3) >> 2;
672 s->filter_lut.lim_lut[
i] =
limit;
673 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
676 s->s.h.filter.sharpness = sharp;
677 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
678 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
679 for (
i = 0;
i < 4;
i++)
682 for (
i = 0;
i < 2;
i++)
693 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
694 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
699 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
700 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
701 for (
i = 0;
i < 7;
i++)
704 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
705 for (
i = 0;
i < 3;
i++)
711 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
712 for (
i = 0;
i < 8;
i++) {
713 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
715 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
717 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
718 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
719 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
725 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
726 int qyac, qydc, quvac, quvdc, lflvl, sh;
728 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
729 if (
s->s.h.segmentation.absolute_vals)
734 qyac =
s->s.h.yac_qi;
746 sh =
s->s.h.filter.level >= 32;
747 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
748 if (
s->s.h.segmentation.absolute_vals)
751 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
753 lflvl =
s->s.h.filter.level;
755 if (
s->s.h.lf_delta.enabled) {
756 s->s.h.segmentation.feat[
i].lflvl[0][0] =
757 s->s.h.segmentation.feat[
i].lflvl[0][1] =
759 for (j = 1; j < 4; j++) {
760 s->s.h.segmentation.feat[
i].lflvl[j][0] =
762 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
763 s->s.h.segmentation.feat[
i].lflvl[j][1] =
765 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
768 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
769 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
779 for (
s->s.h.tiling.log2_tile_cols = 0;
780 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
781 s->s.h.tiling.log2_tile_cols++) ;
782 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
784 while (
max >
s->s.h.tiling.log2_tile_cols) {
786 s->s.h.tiling.log2_tile_cols++;
791 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
792 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
797 for (
i = 0;
i <
s->active_tile_cols;
i++)
802 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
804 s->s.h.tiling.tile_cols : 1;
809 n_range_coders =
s->s.h.tiling.tile_cols;
816 for (
i = 0;
i <
s->active_tile_cols;
i++) {
819 rc += n_range_coders;
824 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
825 int valid_ref_frame = 0;
826 for (
i = 0;
i < 3;
i++) {
828 int refw =
ref->width, refh =
ref->height;
832 "Ref pixfmt (%s) did not match current frame (%s)",
836 }
else if (refw ==
w && refh ==
h) {
837 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
841 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
843 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
848 s->mvscale[
i][0] = (refw << 14) /
w;
849 s->mvscale[
i][1] = (refh << 14) /
h;
850 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
851 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
855 if (!valid_ref_frame) {
856 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
861 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
862 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
872 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
879 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
883 if (size2 >
size - (data2 -
data)) {
896 for (
i = 0;
i <
s->active_tile_cols;
i++) {
897 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
898 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
899 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
901 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
903 s->td[
i].nb_block_structure = 0;
909 s->prob.p =
s->prob_ctx[
c].p;
912 if (
s->s.h.lossless) {
916 if (
s->s.h.txfmmode == 3)
920 for (
i = 0;
i < 2;
i++)
923 for (
i = 0;
i < 2;
i++)
924 for (j = 0; j < 2; j++)
926 s->prob.p.tx16p[
i][j] =
928 for (
i = 0;
i < 2;
i++)
929 for (j = 0; j < 3; j++)
931 s->prob.p.tx32p[
i][j] =
937 for (
i = 0;
i < 4;
i++) {
938 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
940 for (j = 0; j < 2; j++)
941 for (k = 0; k < 2; k++)
942 for (l = 0; l < 6; l++)
943 for (m = 0; m < 6; m++) {
944 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
945 uint8_t *
r =
ref[j][k][l][m];
946 if (m >= 3 && l == 0)
948 for (n = 0; n < 3; n++) {
957 for (j = 0; j < 2; j++)
958 for (k = 0; k < 2; k++)
959 for (l = 0; l < 6; l++)
960 for (m = 0; m < 6; m++) {
961 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
962 uint8_t *
r =
ref[j][k][l][m];
969 if (
s->s.h.txfmmode ==
i)
974 for (
i = 0;
i < 3;
i++)
977 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
978 for (
i = 0;
i < 7;
i++)
979 for (j = 0; j < 3; j++)
981 s->prob.p.mv_mode[
i][j] =
985 for (
i = 0;
i < 4;
i++)
986 for (j = 0; j < 2; j++)
988 s->prob.p.filter[
i][j] =
991 for (
i = 0;
i < 4;
i++)
995 if (
s->s.h.allowcompinter) {
997 if (
s->s.h.comppredmode)
1000 for (
i = 0;
i < 5;
i++)
1009 for (
i = 0;
i < 5;
i++) {
1011 s->prob.p.single_ref[
i][0] =
1014 s->prob.p.single_ref[
i][1] =
1020 for (
i = 0;
i < 5;
i++)
1022 s->prob.p.comp_ref[
i] =
1026 for (
i = 0;
i < 4;
i++)
1027 for (j = 0; j < 9; j++)
1029 s->prob.p.y_mode[
i][j] =
1032 for (
i = 0;
i < 4;
i++)
1033 for (j = 0; j < 4; j++)
1034 for (k = 0; k < 3; k++)
1036 s->prob.p.partition[3 -
i][j][k] =
1038 s->prob.p.partition[3 -
i][j][k]);
1041 for (
i = 0;
i < 3;
i++)
1045 for (
i = 0;
i < 2;
i++) {
1047 s->prob.p.mv_comp[
i].sign =
1050 for (j = 0; j < 10; j++)
1052 s->prob.p.mv_comp[
i].classes[j] =
1056 s->prob.p.mv_comp[
i].class0 =
1059 for (j = 0; j < 10; j++)
1061 s->prob.p.mv_comp[
i].bits[j] =
1065 for (
i = 0;
i < 2;
i++) {
1066 for (j = 0; j < 2; j++)
1067 for (k = 0; k < 3; k++)
1069 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1072 for (j = 0; j < 3; j++)
1074 s->prob.p.mv_comp[
i].fp[j] =
1078 if (
s->s.h.highprecisionmvs) {
1079 for (
i = 0;
i < 2;
i++) {
1081 s->prob.p.mv_comp[
i].class0_hp =
1085 s->prob.p.mv_comp[
i].hp =
1091 return (data2 -
data) + size2;
1095 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1098 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1099 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1101 s->prob.p.partition[bl][
c];
1103 ptrdiff_t hbs = 4 >> bl;
1105 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1106 int bytesperpixel =
s->bytesperpixel;
1111 }
else if (col + hbs < s->cols) {
1112 if (row + hbs < s->rows) {
1120 yoff += hbs * 8 * y_stride;
1121 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1126 yoff += hbs * 8 * bytesperpixel;
1127 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1131 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1135 yoff += hbs * 8 * y_stride;
1136 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1137 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1139 yoff + 8 * hbs * bytesperpixel,
1140 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1147 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1149 yoff + 8 * hbs * bytesperpixel,
1150 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1155 }
else if (row + hbs < s->rows) {
1158 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1159 yoff += hbs * 8 * y_stride;
1160 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1161 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1168 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1170 td->counts.partition[bl][
c][bp]++;
1174 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1178 ptrdiff_t hbs = 4 >> bl;
1180 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1181 int bytesperpixel =
s->bytesperpixel;
1186 }
else if (
td->b->bl == bl) {
1189 yoff += hbs * 8 * y_stride;
1190 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1192 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1193 yoff += hbs * 8 * bytesperpixel;
1194 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1199 if (col + hbs < s->cols) {
1200 if (row + hbs < s->rows) {
1201 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1203 yoff += hbs * 8 * y_stride;
1204 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1207 yoff + 8 * hbs * bytesperpixel,
1208 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1210 yoff += hbs * 8 * bytesperpixel;
1211 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1214 }
else if (row + hbs < s->rows) {
1215 yoff += hbs * 8 * y_stride;
1216 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1224 int sb_start = ( idx * n) >> log2_n;
1225 int sb_end = ((idx + 1) * n) >> log2_n;
1226 *start =
FFMIN(sb_start, n) << 3;
1227 *end =
FFMIN(sb_end, n) << 3;
1235 for (
i = 0;
i <
s->active_tile_cols;
i++)
1244 for (
i = 0;
i < 3;
i++) {
1249 for (
i = 0;
i < 8;
i++) {
1270 int row, col, tile_row, tile_col,
ret;
1272 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1274 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1277 ls_y =
f->linesize[0];
1278 ls_uv =
f->linesize[1];
1279 bytesperpixel =
s->bytesperpixel;
1282 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1284 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1286 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1289 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1290 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1297 if (tile_size >
size)
1308 for (row = tile_row_start; row < tile_row_end;
1309 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1311 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1313 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1315 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1316 td->tile_col_start = tile_col_start;
1318 memset(
td->left_partition_ctx, 0, 8);
1319 memset(
td->left_skip_ctx, 0, 8);
1320 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1325 memset(
td->left_y_nnz_ctx, 0, 16);
1326 memset(
td->left_uv_nnz_ctx, 0, 32);
1327 memset(
td->left_segpred_ctx, 0, 8);
1329 td->c = &
td->c_b[tile_col];
1332 for (col = tile_col_start;
1334 col += 8, yoff2 += 64 * bytesperpixel,
1335 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1339 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1360 if (row + 8 <
s->rows) {
1361 memcpy(
s->intra_pred_data[0],
1362 f->data[0] + yoff + 63 * ls_y,
1363 8 *
s->cols * bytesperpixel);
1364 memcpy(
s->intra_pred_data[1],
1365 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1366 8 *
s->cols * bytesperpixel >>
s->ss_h);
1367 memcpy(
s->intra_pred_data[2],
1368 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1369 8 *
s->cols * bytesperpixel >>
s->ss_h);
1373 if (
s->s.h.filter.level) {
1376 lflvl_ptr =
s->lflvl;
1377 for (col = 0; col <
s->cols;
1378 col += 8, yoff2 += 64 * bytesperpixel,
1379 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1396 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1401 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1402 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1403 unsigned tile_cols_len;
1404 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1409 ls_y =
f->linesize[0];
1410 ls_uv =
f->linesize[1];
1413 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1414 td->tile_col_start = tile_col_start;
1415 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1416 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1417 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1419 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1421 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1423 td->c = &
td->c_b[tile_row];
1424 for (row = tile_row_start; row < tile_row_end;
1425 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1426 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1427 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1429 memset(
td->left_partition_ctx, 0, 8);
1430 memset(
td->left_skip_ctx, 0, 8);
1431 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1436 memset(
td->left_y_nnz_ctx, 0, 16);
1437 memset(
td->left_uv_nnz_ctx, 0, 32);
1438 memset(
td->left_segpred_ctx, 0, 8);
1440 for (col = tile_col_start;
1442 col += 8, yoff2 += 64 * bytesperpixel,
1443 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1446 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1453 tile_cols_len = tile_col_end - tile_col_start;
1454 if (row + 8 <
s->rows) {
1455 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1456 f->data[0] + yoff + 63 * ls_y,
1457 8 * tile_cols_len * bytesperpixel);
1458 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1459 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1460 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1461 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1462 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1463 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1466 vp9_report_tile_progress(
s, row >> 3, 1);
1476 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1478 int bytesperpixel =
s->bytesperpixel, col,
i;
1482 ls_y =
f->linesize[0];
1483 ls_uv =
f->linesize[1];
1485 for (
i = 0;
i <
s->sb_rows;
i++) {
1486 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1488 if (
s->s.h.filter.level) {
1489 yoff = (ls_y * 64)*
i;
1490 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1491 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1492 for (col = 0; col <
s->cols;
1493 col += 8, yoff += 64 * bytesperpixel,
1494 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1507 unsigned int tile, nb_blocks = 0;
1509 if (
s->s.h.segmentation.enabled) {
1510 for (tile = 0; tile <
s->active_tile_cols; tile++)
1511 nb_blocks +=
s->td[tile].nb_block_structure;
1519 par->
qp =
s->s.h.yac_qi;
1520 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1521 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1522 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1523 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1524 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1527 unsigned int block = 0;
1528 unsigned int tile, block_tile;
1530 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1533 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1535 unsigned int row =
td->block_structure[block_tile].row;
1536 unsigned int col =
td->block_structure[block_tile].col;
1537 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1541 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1542 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1544 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1545 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1546 if (
s->s.h.segmentation.absolute_vals)
1547 b->delta_qp -= par->
qp;
1564 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1569 }
else if (
ret == 0) {
1570 if (!
s->s.refs[
ref].f->buf[0]) {
1578 for (
i = 0;
i < 8;
i++) {
1579 if (
s->next_refs[
i].f->buf[0])
1581 if (
s->s.refs[
i].f->buf[0] &&
1591 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1594 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1600 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1608 if (
s->s.h.keyframe)
1621 for (
i = 0;
i < 8;
i++) {
1622 if (
s->next_refs[
i].f->buf[0])
1624 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1626 }
else if (
s->s.refs[
i].f->buf[0]) {
1648 memset(
s->above_partition_ctx, 0,
s->cols);
1649 memset(
s->above_skip_ctx, 0,
s->cols);
1650 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1651 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1655 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1656 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1657 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1658 memset(
s->above_segpred_ctx, 0,
s->cols);
1663 "Failed to allocate block buffers\n");
1666 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1669 for (
i = 0;
i < 4;
i++) {
1670 for (j = 0; j < 2; j++)
1671 for (k = 0; k < 2; k++)
1672 for (l = 0; l < 6; l++)
1673 for (m = 0; m < 6; m++)
1674 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1675 s->prob.coef[
i][j][k][l][m], 3);
1676 if (
s->s.h.txfmmode ==
i)
1679 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1681 }
else if (!
s->s.h.refreshctx) {
1687 for (
i = 0;
i <
s->sb_rows;
i++)
1693 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1694 s->td[
i].b =
s->td[
i].b_base;
1695 s->td[
i].block =
s->td[
i].block_base;
1696 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1697 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1698 s->td[
i].eob =
s->td[
i].eob_base;
1699 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1700 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1701 s->td[
i].error_info = 0;
1706 int tile_row, tile_col;
1710 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1711 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1714 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1715 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1722 if (tile_size >
size)
1747 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1748 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1749 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1751 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1755 }
while (
s->pass++ == 1);
1758 if (
s->td->error_info < 0) {
1760 s->td->error_info = 0;
1771 for (
i = 0;
i < 8;
i++) {
1772 if (
s->s.refs[
i].f->buf[0])
1774 if (
s->next_refs[
i].f->buf[0] &&
1779 if (!
s->s.h.invisible) {
1793 for (
i = 0;
i < 3;
i++)
1795 for (
i = 0;
i < 8;
i++)
1808 s->s.h.filter.sharpness = -1;
1818 for (
int i = 0;
i < 3;
i++) {
1820 if (!
s->s.frames[
i].tf.f)
1823 for (
int i = 0;
i < 8;
i++) {
1826 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1838 for (
i = 0;
i < 3;
i++) {
1839 if (
s->s.frames[
i].tf.f->buf[0])
1841 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1846 for (
i = 0;
i < 8;
i++) {
1847 if (
s->s.refs[
i].f->buf[0])
1849 if (ssrc->next_refs[
i].f->buf[0]) {
1855 s->s.h.invisible = ssrc->s.h.invisible;
1856 s->s.h.keyframe = ssrc->s.h.keyframe;
1857 s->s.h.intraonly = ssrc->s.h.intraonly;
1858 s->ss_v = ssrc->ss_v;
1859 s->ss_h = ssrc->ss_h;
1860 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1861 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1862 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1863 s->bytesperpixel = ssrc->bytesperpixel;
1864 s->gf_fmt = ssrc->gf_fmt;
1867 s->s.h.bpp = ssrc->s.h.bpp;
1868 s->bpp_index = ssrc->bpp_index;
1869 s->pix_fmt = ssrc->pix_fmt;
1870 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1871 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1872 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1873 sizeof(
s->s.h.segmentation.feat));
1895 .bsfs =
"vp9_superframe_split",
1897 #if CONFIG_VP9_DXVA2_HWACCEL
1900 #if CONFIG_VP9_D3D11VA_HWACCEL
1903 #if CONFIG_VP9_D3D11VA2_HWACCEL
1906 #if CONFIG_VP9_NVDEC_HWACCEL
1909 #if CONFIG_VP9_VAAPI_HWACCEL
1912 #if CONFIG_VP9_VDPAU_HWACCEL
1915 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL