37 #define VP9_SYNCCODE 0x498342
120 unsigned coef[4][2][2][6][6][3];
121 unsigned eob[4][2][2][6][6][2];
169 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
170 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
172 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
173 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
249 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
257 if (!(s->pix_fmt == s->
gf_fmt && w == s->
w && h == s->
h)) {
262 #if CONFIG_VP9_DXVA2_HWACCEL
265 #if CONFIG_VP9_D3D11VA_HWACCEL
268 #if CONFIG_VP9_VAAPI_HWACCEL
273 *fmtp++ = s->pix_fmt;
292 s->last_fmt = s->pix_fmt;
295 s->
cols = (w + 7) >> 3;
296 s->
rows = (h + 7) >> 3;
298 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
341 int chroma_blocks, chroma_eobs, bytesperpixel = s->
bytesperpixel;
348 chroma_blocks = 64 * 64 >> (s->
ss_h + s->
ss_v);
349 chroma_eobs = 16 * 16 >> (s->
ss_h + s->
ss_v);
355 16 * 16 + 2 * chroma_eobs) * sbs);
366 16 * 16 + 2 * chroma_eobs);
389 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
395 static const int inv_map_table[255] = {
396 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
397 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
398 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
399 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
400 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
401 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
402 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
403 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
404 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
405 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
406 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
407 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
408 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
409 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
410 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
411 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
412 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
413 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
461 s->
bpp = 8 + bits * 2;
470 s->pix_fmt = pix_fmt_rgb[
bits];
482 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
506 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
517 int c, i, j, k, l, m,
n, w,
h, max, size2, res, sharp;
668 for (i = 0; i < 4; i++)
671 for (i = 0; i < 2; i++)
690 for (i = 0; i < 7; i++)
694 for (i = 0; i < 3; i++)
702 for (i = 0; i < 8; i++) {
716 int qyac, qydc, quvac, quvdc, lflvl, sh;
729 qyac = av_clip_uintp2(qyac, 8);
748 av_clip_uintp2(lflvl + (s->
s.
h.
lf_delta.
ref[0] * (1 << sh)), 6);
749 for (j = 1; j < 4; j++) {
772 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
773 max =
FFMAX(0, max - 1);
794 for (i = 0; i < 3; i++) {
800 "Ref pixfmt (%s) did not match current frame (%s)",
804 }
else if (refw == w && refh == h) {
807 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
809 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
813 s->
mvscale[i][0] = (refw << 14) / w;
814 s->
mvscale[i][1] = (refh << 14) / h;
843 if (size2 > size - (data2 - data)) {
873 for (i = 0; i < 2; i++)
876 for (i = 0; i < 2; i++)
877 for (j = 0; j < 2; j++)
881 for (i = 0; i < 2; i++)
882 for (j = 0; j < 3; j++)
890 for (i = 0; i < 4; i++) {
893 for (j = 0; j < 2; j++)
894 for (k = 0; k < 2; k++)
895 for (l = 0; l < 6; l++)
896 for (m = 0; m < 6; m++) {
899 if (m >= 3 && l == 0)
901 for (n = 0; n < 3; n++) {
911 for (j = 0; j < 2; j++)
912 for (k = 0; k < 2; k++)
913 for (l = 0; l < 6; l++)
914 for (m = 0; m < 6; m++) {
928 for (i = 0; i < 3; i++)
932 for (i = 0; i < 7; i++)
933 for (j = 0; j < 3; j++)
939 for (i = 0; i < 4; i++)
940 for (j = 0; j < 2; j++)
945 for (i = 0; i < 4; i++)
954 for (i = 0; i < 5; i++)
963 for (i = 0; i < 5; i++) {
974 for (i = 0; i < 5; i++)
980 for (i = 0; i < 4; i++)
981 for (j = 0; j < 9; j++)
986 for (i = 0; i < 4; i++)
987 for (j = 0; j < 4; j++)
988 for (k = 0; k < 3; k++)
994 for (i = 0; i < 3; i++)
998 for (i = 0; i < 2; i++) {
1002 for (j = 0; j < 10; j++)
1010 for (j = 0; j < 10; j++)
1016 for (i = 0; i < 2; i++) {
1017 for (j = 0; j < 2; j++)
1018 for (k = 0; k < 3; k++)
1023 for (j = 0; j < 3; j++)
1030 for (i = 0; i < 2; i++) {
1042 return (data2 - data) + size2;
1053 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
1055 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
1056 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1057 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1058 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1059 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1060 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1061 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1062 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1063 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1064 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1065 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1066 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1067 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1068 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1069 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1070 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1071 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1072 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1073 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1074 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1075 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1076 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1077 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1078 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1079 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1080 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1081 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1084 int row = s->
row, col = s->
col, row7 = s->
row7;
1085 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
1086 #define INVALID_MV 0x80008000U
1090 #define RETURN_DIRECT_MV(mv) \
1092 uint32_t m = AV_RN32A(&mv); \
1096 } else if (mem == INVALID_MV) { \
1098 } else if (m != mem) { \
1105 if (sb == 2 || sb == 1) {
1107 }
else if (sb == 3) {
1113 #define RETURN_MV(mv) \
1118 av_assert2(idx == 1); \
1119 av_assert2(mem != INVALID_MV); \
1120 if (mem_sub8x8 == INVALID_MV) { \
1121 clamp_mv(&tmp, &mv, s); \
1122 m = AV_RN32A(&tmp); \
1127 mem_sub8x8 = AV_RN32A(&mv); \
1128 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1129 clamp_mv(&tmp, &mv, s); \
1130 m = AV_RN32A(&tmp); \
1140 uint32_t m = AV_RN32A(&mv); \
1142 clamp_mv(pmv, &mv, s); \
1144 } else if (mem == INVALID_MV) { \
1146 } else if (m != mem) { \
1147 clamp_mv(pmv, &mv, s); \
1155 if (mv->
ref[0] == ref) {
1157 }
else if (mv->
ref[1] == ref) {
1163 if (mv->
ref[0] == ref) {
1165 }
else if (mv->
ref[1] == ref) {
1175 for (; i < 8; i++) {
1176 int c = p[i][0] + col,
r = p[i][1] + row;
1178 if (c >= s->
tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1181 if (mv->
ref[0] == ref) {
1183 }
else if (mv->
ref[1] == ref) {
1195 if (mv->
ref[0] == ref) {
1197 }
else if (mv->
ref[1] == ref) {
1202 #define RETURN_SCALE_MV(mv, scale) \
1205 VP56mv mv_temp = { -mv.x, -mv.y }; \
1206 RETURN_MV(mv_temp); \
1213 for (i = 0; i < 8; i++) {
1214 int c = p[i][0] + col,
r = p[i][1] + row;
1216 if (c >= s->
tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1219 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1222 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1236 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1239 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1251 #undef RETURN_SCALE_MV
1265 for (n = 0, m = 0; m <
c; m++) {
1291 n = (n << 3) | (bit << 1);
1304 return sign ? -(n + 1) : (n + 1);
1319 mode ==
NEWMV ? -1 : sb);
1321 if ((mode ==
NEWMV || sb == -1) &&
1336 if (mode ==
NEWMV) {
1350 mode ==
NEWMV ? -1 : sb);
1351 if ((mode ==
NEWMV || sb == -1) &&
1366 if (mode ==
NEWMV) {
1391 int v16 = v * 0x0101;
1399 uint32_t v32 = v * 0x01010101;
1408 uint64_t v64 = v * 0x0101010101010101ULL;
1414 uint32_t v32 = v * 0x01010101;
1429 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1432 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1440 int row = s->
row, col = s->
col, row7 = s->
row7;
1441 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1445 int vref, filter_id;
1463 for (y = 0; y < h4; y++) {
1464 int idx_base = (y + row) * 8 * s->
sb_cols + col;
1465 for (x = 0; x < w4; x++)
1466 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1504 if (have_a && have_l) {
1528 }
else if (have_l) {
1576 l[0] = a[1] = b->
mode[1];
1578 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1586 l[1] = a[1] = b->
mode[3];
1588 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1592 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1604 }
else if (b->
intra) {
1633 static const uint8_t size_group[10] = {
1634 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1636 int sz = size_group[b->
bs];
1647 static const uint8_t inter_mode_ctx_lut[14][14] = {
1648 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1649 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1650 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1651 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1652 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1653 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1654 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1657 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1658 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1659 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1660 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1661 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1696 }
else if (have_l) {
1734 c = (refa == refl) ? 3 : 1;
1751 c = (refl == refa) ? 4 : 2;
1763 }
else if (have_l) {
1889 }
else if (have_l) {
1903 b->
ref[0] = 1 + bit;
1912 static const uint8_t off[10] = {
1913 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
2009 #define SPLAT_CTX(var, val, n) \
2011 case 1: var = val; break; \
2012 case 2: AV_WN16A(&var, val * 0x0101); break; \
2013 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2014 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2016 uint64_t v64 = val * 0x0101010101010101ULL; \
2017 AV_WN64A( &var, v64); \
2018 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2023 #define SPLAT_CTX(var, val, n) \
2025 case 1: var = val; break; \
2026 case 2: AV_WN16A(&var, val * 0x0101); break; \
2027 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2029 uint32_t v32 = val * 0x01010101; \
2030 AV_WN32A( &var, v32); \
2031 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2035 uint32_t v32 = val * 0x01010101; \
2036 AV_WN32A( &var, v32); \
2037 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2038 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2039 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2046 #define SET_CTXS(dir, off, n) \
2048 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2049 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2050 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2051 if (!s->s.h.keyframe && !s->s.h.intraonly) { \
2052 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2053 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2054 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2056 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2057 if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
2058 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2063 case 1:
SET_CTXS(above, col, 1);
break;
2064 case 2:
SET_CTXS(above, col, 2);
break;
2065 case 4:
SET_CTXS(above, col, 4);
break;
2066 case 8:
SET_CTXS(above, col, 8);
break;
2069 case 1:
SET_CTXS(left, row7, 1);
break;
2070 case 2:
SET_CTXS(left, row7, 2);
break;
2071 case 4:
SET_CTXS(left, row7, 4);
break;
2072 case 8:
SET_CTXS(left, row7, 8);
break;
2092 for (n = 0; n < w4 * 2; n++) {
2096 for (n = 0; n < h4 * 2; n++) {
2104 for (y = 0; y < h4; y++) {
2105 int x, o = (row + y) * s->
sb_cols * 8 + col;
2109 for (x = 0; x < w4; x++) {
2113 }
else if (b->
comp) {
2114 for (x = 0; x < w4; x++) {
2115 mv[x].ref[0] = b->
ref[0];
2116 mv[x].ref[1] = b->
ref[1];
2121 for (x = 0; x < w4; x++) {
2122 mv[x].ref[0] = b->
ref[0];
2133 int is_tx32x32,
int is8bitsperpixel,
int bpp,
unsigned (*cnt)[6][3],
2134 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2135 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2136 const int16_t *band_counts,
const int16_t *qmul)
2138 int i = 0,
band = 0, band_left = band_counts[
band];
2152 cnt[
band][nnz][0]++;
2154 band_left = band_counts[++
band];
2156 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2158 if (++i == n_coeffs)
2165 cnt[
band][nnz][1]++;
2173 cnt[
band][nnz][2]++;
2176 cache[rc] = val = 2;
2210 if (!is8bitsperpixel) {
2235 #define STORE_COEF(c, i, v) do { \
2236 if (is8bitsperpixel) { \
2239 AV_WN32A(&c[i * 2], v); \
2243 band_left = band_counts[++
band];
2248 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2250 }
while (++i < n_coeffs);
2256 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2257 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2258 const int16_t (*nb)[2],
const int16_t *band_counts,
2259 const int16_t *qmul)
2262 nnz, scan, nb, band_counts, qmul);
2266 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2267 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2268 const int16_t (*nb)[2],
const int16_t *band_counts,
2269 const int16_t *qmul)
2272 nnz, scan, nb, band_counts, qmul);
2276 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2277 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2278 const int16_t (*nb)[2],
const int16_t *band_counts,
2279 const int16_t *qmul)
2282 nnz, scan, nb, band_counts, qmul);
2286 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2287 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2288 const int16_t (*nb)[2],
const int16_t *band_counts,
2289 const int16_t *qmul)
2292 nnz, scan, nb, band_counts, qmul);
2299 int row = s->
row, col = s->
col;
2304 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2305 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2306 int n, pl, x, y, res;
2309 const int16_t *
const *yscans =
vp9_scans[tx];
2315 static const int16_t band_counts[4][8] = {
2316 { 1, 2, 3, 4, 3, 16 - 13 },
2317 { 1, 2, 3, 4, 11, 64 - 21 },
2318 { 1, 2, 3, 4, 11, 256 - 21 },
2319 { 1, 2, 3, 4, 11, 1024 - 21 },
2321 const int16_t *y_band_counts = band_counts[b->tx];
2322 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2323 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2324 int total_coeff = 0;
2326 #define MERGE(la, end, step, rd) \
2327 for (n = 0; n < end; n += step) \
2328 la[n] = !!rd(&la[n])
2329 #define MERGE_CTX(step, rd) \
2331 MERGE(l, end_y, step, rd); \
2332 MERGE(a, end_x, step, rd); \
2335 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2336 for (n = 0, y = 0; y < end_y; y += step) { \
2337 for (x = 0; x < end_x; x += step, n += step * step) { \
2338 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2339 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2340 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2341 c, e, p, a[x] + l[y], yscans[txtp], \
2342 ynbs[txtp], y_band_counts, qmul[0]); \
2343 a[x] = l[y] = !!res; \
2344 total_coeff |= !!res; \
2346 AV_WN16A(&s->eob[n], res); \
2353 #define SPLAT(la, end, step, cond) \
2355 for (n = 1; n < end; n += step) \
2356 la[n] = la[n - 1]; \
2357 } else if (step == 4) { \
2359 for (n = 0; n < end; n += step) \
2360 AV_WN32A(&la[n], la[n] * 0x01010101); \
2362 for (n = 0; n < end; n += step) \
2363 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2367 if (HAVE_FAST_64BIT) { \
2368 for (n = 0; n < end; n += step) \
2369 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2371 for (n = 0; n < end; n += step) { \
2372 uint32_t v32 = la[n] * 0x01010101; \
2373 AV_WN32A(&la[n], v32); \
2374 AV_WN32A(&la[n + 4], v32); \
2378 for (n = 0; n < end; n += step) \
2379 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2382 #define SPLAT_CTX(step) \
2384 SPLAT(a, end_x, step, end_x == w4); \
2385 SPLAT(l, end_y, step, end_y == h4); \
2410 #define DECODE_UV_COEF_LOOP(step, v) \
2411 for (n = 0, y = 0; y < end_y; y += step) { \
2412 for (x = 0; x < end_x; x += step, n += step * step) { \
2413 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2414 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2415 16 * step * step, c, e, p, a[x] + l[y], \
2416 uvscan, uvnb, uv_band_counts, qmul[1]); \
2417 a[x] = l[y] = !!res; \
2418 total_coeff |= !!res; \
2420 AV_WN16A(&s->uveob[pl][n], res); \
2422 s->uveob[pl][n] = res; \
2434 for (pl = 0; pl < 2; pl++) {
2473 uint8_t *dst_edge, ptrdiff_t stride_edge,
2474 uint8_t *dst_inner, ptrdiff_t stride_inner,
2475 uint8_t *l,
int col,
int x,
int w,
2477 int p,
int ss_h,
int ss_v,
int bytesperpixel)
2479 int have_top = row > 0 || y > 0;
2481 int have_right = x < w - 1;
2483 static const uint8_t mode_conv[10][2 ][2 ] = {
2505 static const struct {
2514 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2517 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2518 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2520 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2521 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2530 mode = mode_conv[
mode][have_left][have_top];
2531 if (edges[mode].needs_top) {
2533 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !ss_h) - x) * 4;
2534 int n_px_need_tr = 0;
2536 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2543 top = !(row & 7) && !y ?
2545 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2547 topleft = !(row & 7) && !y ?
2549 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2550 &dst_inner[-stride_inner];
2554 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2555 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2556 n_px_need + n_px_need_tr <= n_px_have) {
2560 if (n_px_need <= n_px_have) {
2561 memcpy(*a, top, n_px_need * bytesperpixel);
2563 #define memset_bpp(c, i1, v, i2, num) do { \
2564 if (bytesperpixel == 1) { \
2565 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2567 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2568 for (n = 0; n < (num); n++) { \
2569 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2573 memcpy(*a, top, n_px_have * bytesperpixel);
2574 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2577 #define memset_val(c, val, num) do { \
2578 if (bytesperpixel == 1) { \
2579 memset((c), (val), (num)); \
2582 for (n = 0; n < (num); n++) { \
2583 AV_WN16A(&(c)[n * 2], (val)); \
2587 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2589 if (edges[mode].needs_topleft) {
2590 if (have_left && have_top) {
2591 #define assign_bpp(c, i1, v, i2) do { \
2592 if (bytesperpixel == 1) { \
2593 (c)[(i1)] = (v)[(i2)]; \
2595 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2600 #define assign_val(c, i, v) do { \
2601 if (bytesperpixel == 1) { \
2604 AV_WN16A(&(c)[(i) * 2], (v)); \
2607 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2610 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2611 if (have_top && have_right &&
2612 n_px_need + n_px_need_tr <= n_px_have) {
2613 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2620 if (edges[mode].needs_left) {
2622 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !ss_v) - y) * 4;
2623 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2624 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2626 if (edges[mode].invert_left) {
2627 if (n_px_need <= n_px_have) {
2628 for (i = 0; i < n_px_need; i++)
2631 for (i = 0; i < n_px_have; i++)
2633 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2636 if (n_px_need <= n_px_have) {
2637 for (i = 0; i < n_px_need; i++)
2638 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2640 for (i = 0; i < n_px_have; i++)
2641 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2642 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2646 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2654 ptrdiff_t uv_off,
int bytesperpixel)
2658 int row = s->
row, col = s->
col;
2659 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2660 int h4 =
bwh_tab[1][b->
bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2661 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2662 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2664 int uvstep1d = 1 << b->
uvtx, p;
2669 for (
n = 0, y = 0; y < end_y; y += step1d) {
2670 uint8_t *ptr = dst, *ptr_r = dst_r;
2671 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2672 ptr_r += 4 * step1d * bytesperpixel,
n += step) {
2682 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2686 s->
block + 16 * n * bytesperpixel, eob);
2696 step = 1 << (b->
uvtx * 2);
2697 for (p = 0; p < 2; p++) {
2698 dst = s->
dst[1 + p];
2700 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2701 uint8_t *ptr = dst, *ptr_r = dst_r;
2702 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2703 ptr_r += 4 * uvstep1d * bytesperpixel,
n += step) {
2710 ptr, s->
uv_stride, l, col, x, w4, row, y,
2715 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
2734 uint8_t *dst, ptrdiff_t dst_stride,
2737 ptrdiff_t y, ptrdiff_t x,
const VP56mv *
mv,
2738 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2740 int mx = mv->
x, my = mv->
y,
th;
2744 ref += y * ref_stride + x * bytesperpixel;
2750 th = (y + bh + 4 * !!my + 7) >> 6;
2752 if (x < !!mx * 3 || y < !!my * 3 ||
2753 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2755 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2757 bw + !!mx * 7, bh + !!my * 7,
2758 x - !!mx * 3, y - !!my * 3, w, h);
2762 mc[!!mx][!!my](dst, dst_stride,
ref, ref_stride, bh, mx << 1, my << 1);
2767 ptrdiff_t dst_stride,
2768 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2769 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2771 ptrdiff_t y, ptrdiff_t x,
const VP56mv *
mv,
2772 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2774 int mx = mv->
x * (1 << !s->
ss_h), my = mv->
y * (1 << !s->
ss_v),
th;
2778 ref_u += y * src_stride_u + x * bytesperpixel;
2779 ref_v += y * src_stride_v + x * bytesperpixel;
2785 th = (y + bh + 4 * !!my + 7) >> (6 - s->
ss_v);
2787 if (x < !!mx * 3 || y < !!my * 3 ||
2788 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2790 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2792 bw + !!mx * 7, bh + !!my * 7,
2793 x - !!mx * 3, y - !!my * 3, w, h);
2794 ref_u = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2795 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2798 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2800 bw + !!mx * 7, bh + !!my * 7,
2801 x - !!mx * 3, y - !!my * 3, w, h);
2802 ref_v = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2803 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2805 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2806 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2810 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2811 px, py, pw, ph, bw, bh, w, h, i) \
2812 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2813 mv, bw, bh, w, h, bytesperpixel)
2814 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2815 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2816 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2817 row, col, mv, bw, bh, w, h, bytesperpixel)
2819 #define FN(x) x##_8bpp
2820 #define BYTES_PER_PIXEL 1
2823 #undef BYTES_PER_PIXEL
2824 #define FN(x) x##_16bpp
2825 #define BYTES_PER_PIXEL 2
2828 #undef mc_chroma_dir
2830 #undef BYTES_PER_PIXEL
2835 uint8_t *dst, ptrdiff_t dst_stride,
2838 ptrdiff_t y, ptrdiff_t x,
const VP56mv *in_mv,
2839 int px,
int py,
int pw,
int ph,
2840 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2841 const uint16_t *scale,
const uint8_t *step)
2846 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2848 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2850 int refbw_m1, refbh_m1;
2854 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) * 8, (s->
cols * 8 - x + px + 3) * 8);
2855 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) * 8, (s->
rows * 8 - y + py + 3) * 8);
2864 ref += y * ref_stride + x * bytesperpixel;
2867 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2868 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2872 th = (y + refbh_m1 + 4 + 7) >> 6;
2874 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2876 ref - 3 * ref_stride - 3 * bytesperpixel,
2878 refbw_m1 + 8, refbh_m1 + 8,
2879 x - 3, y - 3, w, h);
2883 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2890 ptrdiff_t dst_stride,
2891 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2892 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2894 ptrdiff_t y, ptrdiff_t x,
const VP56mv *in_mv,
2895 int px,
int py,
int pw,
int ph,
2896 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2897 const uint16_t *scale,
const uint8_t *step)
2902 ref_v, src_stride_v, ref_frame,
2903 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2906 int refbw_m1, refbh_m1;
2912 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) * 16, (s->
cols * 4 - x + px + 3) * 16);
2915 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) * 8, (s->
cols * 8 - x + px + 3) * 8);
2920 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) * 16, (s->
rows * 4 - y + py + 3) * 16);
2923 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) * 8, (s->
rows * 8 - y + py + 3) * 8);
2929 ref_u += y * src_stride_u + x * bytesperpixel;
2930 ref_v += y * src_stride_v + x * bytesperpixel;
2933 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2934 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2938 th = (y + refbh_m1 + 4 + 7) >> (6 - s->
ss_v);
2940 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2942 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2944 refbw_m1 + 8, refbh_m1 + 8,
2945 x - 3, y - 3, w, h);
2947 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2950 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2952 refbw_m1 + 8, refbh_m1 + 8,
2953 x - 3, y - 3, w, h);
2955 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2957 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2958 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2963 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2964 px, py, pw, ph, bw, bh, w, h, i) \
2965 mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2966 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2967 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2968 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2969 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2970 mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2971 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2972 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2974 #define FN(x) x##_scaled_8bpp
2975 #define BYTES_PER_PIXEL 1
2978 #undef BYTES_PER_PIXEL
2979 #define FN(x) x##_scaled_16bpp
2980 #define BYTES_PER_PIXEL 2
2983 #undef mc_chroma_dir
2985 #undef BYTES_PER_PIXEL
2992 int row = s->
row, col = s->
col;
2995 if (bytesperpixel == 1) {
2996 inter_pred_scaled_8bpp(ctx);
2998 inter_pred_scaled_16bpp(ctx);
3001 if (bytesperpixel == 1) {
3002 inter_pred_8bpp(ctx);
3004 inter_pred_16bpp(ctx);
3010 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
3011 int h4 =
bwh_tab[1][b->
bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3012 int end_x =
FFMIN(2 * (s->
cols - col), w4);
3013 int end_y =
FFMIN(2 * (s->
rows - row), h4);
3015 int uvstep1d = 1 << b->
uvtx, p;
3019 for (
n = 0, y = 0; y < end_y; y += step1d) {
3021 for (x = 0; x < end_x; x += step1d,
3022 ptr += 4 * step1d * bytesperpixel,
n += step) {
3027 s->
block + 16 * n * bytesperpixel, eob);
3035 step = 1 << (b->
uvtx * 2);
3036 for (p = 0; p < 2; p++) {
3037 dst = s->
dst[p + 1];
3038 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
3040 for (x = 0; x < end_x; x += uvstep1d,
3041 ptr += 4 * uvstep1d * bytesperpixel,
n += step) {
3046 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
3065 int row_and_7,
int col_and_7,
3066 int w,
int h,
int col_end,
int row_end,
3069 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3070 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3082 if (tx ==
TX_4X4 && (ss_v | ss_h)) {
3097 if (tx ==
TX_4X4 && !skip_inter) {
3098 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3100 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3102 for (y = row_and_7; y < h + row_and_7; y++) {
3103 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3105 mask[0][y][1] |= m_row_8;
3106 mask[0][y][2] |= m_row_4;
3117 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3118 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3120 mask[1][y][col_mask_id] |= m_col;
3123 mask[0][y][3] |= m_col;
3125 if (ss_h && (col_end & 1))
3126 mask[1][y][3] |= (t << (w - 1)) - t;
3128 mask[1][y][3] |= m_col;
3132 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3135 int mask_id = (tx ==
TX_8X8);
3136 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3137 int l2 = tx + ss_h - 1, step1d;
3138 int m_row = m_col & masks[l2];
3142 if (ss_h && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
3143 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3144 int m_row_8 = m_row - m_row_16;
3146 for (y = row_and_7; y < h + row_and_7; y++) {
3147 mask[0][y][0] |= m_row_16;
3148 mask[0][y][1] |= m_row_8;
3151 for (y = row_and_7; y < h + row_and_7; y++)
3152 mask[0][y][mask_id] |= m_row;
3157 if (ss_v && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
3158 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3159 mask[1][y][0] |= m_col;
3160 if (y - row_and_7 == h - 1)
3161 mask[1][y][1] |= m_col;
3163 for (y = row_and_7; y < h + row_and_7; y += step1d)
3164 mask[1][y][mask_id] |= m_col;
3166 }
else if (tx !=
TX_4X4) {
3169 mask_id = (tx ==
TX_8X8) || (h == ss_v);
3170 mask[1][row_and_7][mask_id] |= m_col;
3171 mask_id = (tx ==
TX_8X8) || (w == ss_h);
3172 for (y = row_and_7; y < h + row_and_7; y++)
3173 mask[0][y][mask_id] |= t;
3175 int t8 = t & wide_filter_col_mask[ss_h],
t4 = t -
t8;
3177 for (y = row_and_7; y < h + row_and_7; y++) {
3181 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3187 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3202 s->
min_mv.
x = -(128 + col * 64);
3203 s->
min_mv.
y = -(128 + row * 64);
3211 b->
uvtx = b->tx - ((s->
ss_h && w4 * 2 == (1 << b->tx)) ||
3212 (s->
ss_v && h4 * 2 == (1 << b->tx)));
3217 if (bytesperpixel == 1) {
3230 #define SPLAT_ZERO_CTX(v, n) \
3232 case 1: v = 0; break; \
3233 case 2: AV_ZERO16(&v); break; \
3234 case 4: AV_ZERO32(&v); break; \
3235 case 8: AV_ZERO64(&v); break; \
3236 case 16: AV_ZERO128(&v); break; \
3238 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3240 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3241 if (s->ss_##dir2) { \
3242 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3243 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3245 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3246 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3266 s->
block += w4 * h4 * 64 * bytesperpixel;
3269 s->
eob += 4 * w4 * h4;
3280 emu[0] = (col + w4) * 8 * bytesperpixel > f->
linesize[0] ||
3281 (row + h4) > s->
rows;
3282 emu[1] = ((col + w4) * 8 >> s->
ss_h) * bytesperpixel > f->
linesize[1] ||
3283 (row + h4) > s->
rows;
3288 s->
dst[0] = f->
data[0] + yoff;
3296 s->
dst[1] = f->
data[1] + uvoff;
3297 s->
dst[2] = f->
data[2] + uvoff;
3316 for (
n = 0; o < w;
n++) {
3322 s->
tmp_y + o * bytesperpixel, 128,
h, 0, 0);
3331 for (
n = s->
ss_h; o < w;
n++) {
3337 s->
tmp_uv[0] + o * bytesperpixel, 128,
h, 0, 0);
3339 s->
tmp_uv[1] + o * bytesperpixel, 128,
h, 0, 0);
3353 mask_edges(lflvl->
mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3358 b->
uvtx, skip_inter);
3365 limit >>= (sharp + 3) >> 2;
3366 limit =
FFMIN(limit, 9 - sharp);
3368 limit =
FFMAX(limit, 1);
3377 s->
block += w4 * h4 * 64 * bytesperpixel;
3380 s->
eob += 4 * w4 * h4;
3387 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3395 ptrdiff_t hbs = 4 >> bl;
3402 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3403 }
else if (col + hbs < s->cols) {
3404 if (row + hbs < s->rows) {
3408 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3411 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3412 yoff += hbs * 8 * y_stride;
3413 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3414 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3417 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3418 yoff += hbs * 8 * bytesperpixel;
3419 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3420 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3423 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3425 yoff + 8 * hbs * bytesperpixel,
3426 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3427 yoff += hbs * 8 * y_stride;
3428 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3429 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3430 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3431 yoff + 8 * hbs * bytesperpixel,
3432 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3439 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3441 yoff + 8 * hbs * bytesperpixel,
3442 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3445 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3447 }
else if (row + hbs < s->rows) {
3450 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3451 yoff += hbs * 8 * y_stride;
3452 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3453 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3456 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3460 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3466 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3470 ptrdiff_t hbs = 4 >> bl;
3477 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3478 }
else if (s->
b->
bl == bl) {
3479 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3481 yoff += hbs * 8 * y_stride;
3482 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3483 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3485 yoff += hbs * 8 * bytesperpixel;
3486 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3487 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3491 if (col + hbs < s->cols) {
3492 if (row + hbs < s->rows) {
3493 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3494 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3495 yoff += hbs * 8 * y_stride;
3496 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3497 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3499 yoff + 8 * hbs * bytesperpixel,
3500 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3502 yoff += hbs * 8 * bytesperpixel;
3503 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3504 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3506 }
else if (row + hbs < s->rows) {
3507 yoff += hbs * 8 * y_stride;
3508 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3509 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3521 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3522 uint8_t *ptr = dst, *l = lvl, *hmask1 =
mask[y], *hmask2 =
mask[y + 1 + ss_v];
3523 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3524 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3525 unsigned hm = hm1 | hm2 | hm13 | hm23;
3527 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3530 int L = *l,
H = L >> 4;
3533 if (hmask1[0] & x) {
3534 if (hmask2[0] & x) {
3540 }
else if (hm2 & x) {
3547 [0](ptr, ls,
E, I,
H);
3550 [0](ptr, ls, E, I, H);
3552 }
else if (hm2 & x) {
3553 int L = l[8 << ss_v],
H = L >> 4;
3557 [0](ptr + 8 * ls, ls, E, I, H);
3565 int L = *l,
H = L >> 4;
3577 }
else if (hm23 & x) {
3578 int L = l[8 << ss_v],
H = L >> 4;
3598 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3600 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3602 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3605 int L = *l,
H = L >> 4;
3609 if (vmask[0] & (x << (1 + ss_h))) {
3615 }
else if (vm & (x << (1 + ss_h))) {
3621 [!!(vmask[1] & (x << (1 + ss_h)))]
3622 [1](ptr, ls,
E, I,
H);
3625 [1](ptr, ls, E, I, H);
3627 }
else if (vm & (x << (1 + ss_h))) {
3628 int L = l[1 + ss_h],
H = L >> 4;
3632 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3637 int L = *l,
H = L >> 4;
3640 if (vm3 & (x << (1 + ss_h))) {
3649 }
else if (vm3 & (x << (1 + ss_h))) {
3650 int L = l[1 + ss_h],
H = L >> 4;
3667 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3685 for (p = 0; p < 2; p++) {
3686 dst = f->
data[1 + p] + uvoff;
3694 int sb_start = ( idx *
n) >> log2_n;
3695 int sb_end = ((idx + 1) * n) >> log2_n;
3696 *start =
FFMIN(sb_start, n) << 3;
3697 *end =
FFMIN(sb_end, n) << 3;
3701 int max_count,
int update_factor)
3703 unsigned ct = ct0 + ct1, p2, p1;
3708 update_factor =
FASTDIV(update_factor *
FFMIN(ct, max_count), max_count);
3710 p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct;
3711 p2 = av_clip(p2, 1, 255);
3714 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3724 for (i = 0; i < 4; i++)
3725 for (j = 0; j < 2; j++)
3726 for (k = 0; k < 2; k++)
3727 for (l = 0; l < 6; l++)
3728 for (m = 0; m < 6; m++) {
3730 unsigned *e = s->
counts.
eob[i][j][k][l][m];
3733 if (l == 0 && m >= 3)
3737 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3750 for (i = 0; i < 3; i++)
3754 for (i = 0; i < 4; i++)
3759 for (i = 0; i < 5; i++)
3765 for (i = 0; i < 5; i++)
3771 for (i = 0; i < 5; i++) {
3775 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3776 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3781 for (i = 0; i < 4; i++)
3782 for (j = 0; j < 4; j++) {
3786 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3787 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3793 for (i = 0; i < 2; i++) {
3799 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3807 for (i = 0; i < 4; i++) {
3811 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3817 for (i = 0; i < 7; i++) {
3821 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3822 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3831 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3832 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3837 for (i = 0; i < 2; i++) {
3839 unsigned *
c, (*c2)[2], sum;
3846 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3851 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3854 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3858 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3866 for (j = 0; j < 10; j++)
3867 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3869 for (j = 0; j < 2; j++) {
3872 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3873 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3878 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3879 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3891 for (i = 0; i < 4; i++) {
3895 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3915 for (i = 0; i < 10; i++) {
3919 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3951 for (i = 0; i < 3; i++) {
3956 for (i = 0; i < 8; i++) {
3978 int res, tile_row, tile_col, i,
ref, row, col;
3981 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3987 }
else if (res == 0) {
4001 for (i = 0; i < 8; i++) {
4043 for (i = 0; i < 8; i++) {
4085 "Failed to allocate block buffers\n");
4091 for (i = 0; i < 4; i++) {
4092 for (j = 0; j < 2; j++)
4093 for (k = 0; k < 2; k++)
4094 for (l = 0; l < 6; l++)
4095 for (m = 0; m < 6; m++)