36 #define VP9_SYNCCODE 0x498342
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
150 #define MAX_SEGMENT 8
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
329 s->
cols = (w + 7) >> 3;
330 s->
rows = (h + 7) >> 3;
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
375 int chroma_blocks, chroma_eobs, bytesperpixel = s->
bytesperpixel;
382 chroma_blocks = 64 * 64 >> (s->
ss_h + s->
ss_v);
383 chroma_eobs = 16 * 16 >> (s->
ss_h + s->
ss_v);
389 16 * 16 + 2 * chroma_eobs) * sbs);
400 16 * 16 + 2 * chroma_eobs);
423 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
429 static const int inv_map_table[255] = {
430 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
431 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
432 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
433 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
434 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
435 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
436 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
437 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
438 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
439 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
440 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
441 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
442 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
443 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
444 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
445 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
446 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
447 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
496 s->
bpp = 8 + bits * 2;
505 res = pix_fmt_rgb[
bits];
517 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
540 res = pix_fmt_for_ss[
bits][1][1];
551 int c, i, j, k, l,
m,
n, w,
h, max, size2, res, sharp;
675 for (i = 0; i < 3; i++) {
681 "Ref pixfmt (%s) did not match current frame (%s)",
685 }
else if (refw == w && refh == h) {
688 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
690 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
694 s->
mvscale[i][0] = (refw << 14) / w;
695 s->
mvscale[i][1] = (refh << 14) / h;
726 for (i = 0; i < 4; i++)
729 for (i = 0; i < 2; i++)
749 for (i = 0; i < 7; i++)
753 for (i = 0; i < 3; i++)
762 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
770 for (i = 0; i < 8; i++) {
784 int qyac, qydc, quvac, quvdc, lflvl, sh;
794 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
797 qyac = av_clip_uintp2(qyac, 8);
816 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
817 for (j = 1; j < 4; j++) {
833 av_log(ctx,
AV_LOG_ERROR,
"Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
839 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
840 max =
FFMAX(0, max - 1);
879 if (size2 > size - (data2 - data)) {
909 for (i = 0; i < 2; i++)
912 for (i = 0; i < 2; i++)
913 for (j = 0; j < 2; j++)
917 for (i = 0; i < 2; i++)
918 for (j = 0; j < 3; j++)
926 for (i = 0; i < 4; i++) {
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
935 if (m >= 3 && l == 0)
937 for (n = 0; n < 3; n++) {
947 for (j = 0; j < 2; j++)
948 for (k = 0; k < 2; k++)
949 for (l = 0; l < 6; l++)
950 for (m = 0; m < 6; m++) {
964 for (i = 0; i < 3; i++)
968 for (i = 0; i < 7; i++)
969 for (j = 0; j < 3; j++)
975 for (i = 0; i < 4; i++)
976 for (j = 0; j < 2; j++)
981 for (i = 0; i < 4; i++)
990 for (i = 0; i < 5; i++)
999 for (i = 0; i < 5; i++) {
1010 for (i = 0; i < 5; i++)
1016 for (i = 0; i < 4; i++)
1017 for (j = 0; j < 9; j++)
1022 for (i = 0; i < 4; i++)
1023 for (j = 0; j < 4; j++)
1024 for (k = 0; k < 3; k++)
1030 for (i = 0; i < 3; i++)
1034 for (i = 0; i < 2; i++) {
1038 for (j = 0; j < 10; j++)
1046 for (j = 0; j < 10; j++)
1052 for (i = 0; i < 2; i++) {
1053 for (j = 0; j < 2; j++)
1054 for (k = 0; k < 3; k++)
1059 for (j = 0; j < 3; j++)
1066 for (i = 0; i < 2; i++) {
1078 return (data2 - data) + size2;
1089 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
1091 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
1092 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1093 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1094 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1095 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1096 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1097 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1098 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1099 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1100 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1101 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1102 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1103 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1104 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1105 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1106 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1107 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1108 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1109 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1110 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1111 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1112 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1113 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1114 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1115 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1116 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1117 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1120 int row = s->
row, col = s->
col, row7 = s->
row7;
1121 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
1122 #define INVALID_MV 0x80008000U
1126 #define RETURN_DIRECT_MV(mv) \
1128 uint32_t m = AV_RN32A(&mv); \
1132 } else if (mem == INVALID_MV) { \
1134 } else if (m != mem) { \
1141 if (sb == 2 || sb == 1) {
1143 }
else if (sb == 3) {
1149 #define RETURN_MV(mv) \
1154 av_assert2(idx == 1); \
1155 av_assert2(mem != INVALID_MV); \
1156 if (mem_sub8x8 == INVALID_MV) { \
1157 clamp_mv(&tmp, &mv, s); \
1158 m = AV_RN32A(&tmp); \
1163 mem_sub8x8 = AV_RN32A(&mv); \
1164 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1165 clamp_mv(&tmp, &mv, s); \
1166 m = AV_RN32A(&tmp); \
1176 uint32_t m = AV_RN32A(&mv); \
1178 clamp_mv(pmv, &mv, s); \
1180 } else if (mem == INVALID_MV) { \
1182 } else if (m != mem) { \
1183 clamp_mv(pmv, &mv, s); \
1191 if (mv->
ref[0] == ref) {
1193 }
else if (mv->
ref[1] == ref) {
1199 if (mv->
ref[0] == ref) {
1201 }
else if (mv->
ref[1] == ref) {
1211 for (; i < 8; i++) {
1212 int c = p[i][0] + col,
r = p[i][1] + row;
1217 if (mv->
ref[0] == ref) {
1219 }
else if (mv->
ref[1] == ref) {
1231 if (mv->
ref[0] == ref) {
1233 }
else if (mv->
ref[1] == ref) {
1238 #define RETURN_SCALE_MV(mv, scale) \
1241 VP56mv mv_temp = { -mv.x, -mv.y }; \
1242 RETURN_MV(mv_temp); \
1249 for (i = 0; i < 8; i++) {
1250 int c = p[i][0] + col,
r = p[i][1] + row;
1255 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1258 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1272 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1275 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1287 #undef RETURN_SCALE_MV
1301 for (n = 0, m = 0; m <
c; m++) {
1327 n = (n << 3) | (bit << 1);
1340 return sign ? -(n + 1) : (n + 1);
1355 mode ==
NEWMV ? -1 : sb);
1357 if ((mode ==
NEWMV || sb == -1) &&
1372 if (mode ==
NEWMV) {
1386 mode ==
NEWMV ? -1 : sb);
1387 if ((mode ==
NEWMV || sb == -1) &&
1402 if (mode ==
NEWMV) {
1427 int v16 = v * 0x0101;
1435 uint32_t v32 = v * 0x01010101;
1444 uint64_t v64 = v * 0x0101010101010101ULL;
1450 uint32_t v32 = v * 0x01010101;
1465 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1468 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1476 int row = s->
row, col = s->
col, row7 = s->
row7;
1477 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1481 int vref, filter_id;
1499 for (
y = 0;
y < h4;
y++) {
1500 int idx_base = (
y + row) * 8 * s->
sb_cols + col;
1501 for (x = 0; x < w4; x++)
1502 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1540 if (have_a && have_l) {
1564 }
else if (have_l) {
1612 l[0] = a[1] = b->
mode[1];
1614 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1622 l[1] = a[1] = b->
mode[3];
1624 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1628 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1640 }
else if (b->
intra) {
1669 static const uint8_t size_group[10] = {
1670 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1672 int sz = size_group[b->
bs];
1683 static const uint8_t inter_mode_ctx_lut[14][14] = {
1684 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1687 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1688 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1689 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1690 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1691 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1692 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1693 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1694 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1695 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1696 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1697 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1732 }
else if (have_l) {
1763 if (refl == refa && refa == s->
varcompref[1]) {
1770 c = (refa == refl) ? 3 : 1;
1787 c = (refl == refa) ? 4 : 2;
1799 }
else if (have_l) {
1925 }
else if (have_l) {
1939 b->
ref[0] = 1 + bit;
1948 static const uint8_t off[10] = {
1949 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
2045 #define SPLAT_CTX(var, val, n) \
2047 case 1: var = val; break; \
2048 case 2: AV_WN16A(&var, val * 0x0101); break; \
2049 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2050 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2052 uint64_t v64 = val * 0x0101010101010101ULL; \
2053 AV_WN64A( &var, v64); \
2054 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2059 #define SPLAT_CTX(var, val, n) \
2061 case 1: var = val; break; \
2062 case 2: AV_WN16A(&var, val * 0x0101); break; \
2063 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2065 uint32_t v32 = val * 0x01010101; \
2066 AV_WN32A( &var, v32); \
2067 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2071 uint32_t v32 = val * 0x01010101; \
2072 AV_WN32A( &var, v32); \
2073 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2074 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2075 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2082 #define SET_CTXS(dir, off, n) \
2084 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2085 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2086 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2087 if (!s->keyframe && !s->intraonly) { \
2088 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2089 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2090 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2092 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2093 if (s->filtermode == FILTER_SWITCHABLE) { \
2094 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2099 case 1:
SET_CTXS(above, col, 1);
break;
2100 case 2:
SET_CTXS(above, col, 2);
break;
2101 case 4:
SET_CTXS(above, col, 4);
break;
2102 case 8:
SET_CTXS(above, col, 8);
break;
2105 case 1:
SET_CTXS(left, row7, 1);
break;
2106 case 2:
SET_CTXS(left, row7, 2);
break;
2107 case 4:
SET_CTXS(left, row7, 4);
break;
2108 case 8:
SET_CTXS(left, row7, 8);
break;
2128 for (n = 0; n < w4 * 2; n++) {
2132 for (n = 0; n < h4 * 2; n++) {
2140 for (
y = 0;
y < h4;
y++) {
2141 int x, o = (row +
y) * s->
sb_cols * 8 + col;
2145 for (x = 0; x < w4; x++) {
2149 }
else if (b->
comp) {
2150 for (x = 0; x < w4; x++) {
2151 mv[x].ref[0] = b->
ref[0];
2152 mv[x].ref[1] = b->
ref[1];
2157 for (x = 0; x < w4; x++) {
2158 mv[x].ref[0] = b->
ref[0];
2169 int is_tx32x32,
int is8bitsperpixel,
int bpp,
unsigned (*cnt)[6][3],
2170 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2171 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2172 const int16_t *band_counts,
const int16_t *qmul)
2174 int i = 0,
band = 0, band_left = band_counts[
band];
2188 cnt[
band][nnz][0]++;
2190 band_left = band_counts[++
band];
2192 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2194 if (++i == n_coeffs)
2201 cnt[
band][nnz][1]++;
2209 cnt[
band][nnz][2]++;
2212 cache[rc] = val = 2;
2246 if (!is8bitsperpixel) {
2271 #define STORE_COEF(c, i, v) do { \
2272 if (is8bitsperpixel) { \
2275 AV_WN32A(&c[i * 2], v); \
2279 band_left = band_counts[++
band];
2284 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2286 }
while (++i < n_coeffs);
2292 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2293 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2294 const int16_t (*nb)[2],
const int16_t *band_counts,
2295 const int16_t *qmul)
2298 nnz, scan, nb, band_counts, qmul);
2302 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2303 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2304 const int16_t (*nb)[2],
const int16_t *band_counts,
2305 const int16_t *qmul)
2308 nnz, scan, nb, band_counts, qmul);
2312 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2313 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2314 const int16_t (*nb)[2],
const int16_t *band_counts,
2315 const int16_t *qmul)
2318 nnz, scan, nb, band_counts, qmul);
2322 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2323 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2324 const int16_t (*nb)[2],
const int16_t *band_counts,
2325 const int16_t *qmul)
2328 nnz, scan, nb, band_counts, qmul);
2335 int row = s->
row, col = s->
col;
2340 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2341 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2342 int n, pl, x,
y, res;
2345 const int16_t *
const *yscans =
vp9_scans[tx];
2351 static const int16_t band_counts[4][8] = {
2352 { 1, 2, 3, 4, 3, 16 - 13 },
2353 { 1, 2, 3, 4, 11, 64 - 21 },
2354 { 1, 2, 3, 4, 11, 256 - 21 },
2355 { 1, 2, 3, 4, 11, 1024 - 21 },
2357 const int16_t *y_band_counts = band_counts[b->tx];
2358 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2359 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2360 int total_coeff = 0;
2362 #define MERGE(la, end, step, rd) \
2363 for (n = 0; n < end; n += step) \
2364 la[n] = !!rd(&la[n])
2365 #define MERGE_CTX(step, rd) \
2367 MERGE(l, end_y, step, rd); \
2368 MERGE(a, end_x, step, rd); \
2371 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2372 for (n = 0, y = 0; y < end_y; y += step) { \
2373 for (x = 0; x < end_x; x += step, n += step * step) { \
2374 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2375 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2376 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2377 c, e, p, a[x] + l[y], yscans[txtp], \
2378 ynbs[txtp], y_band_counts, qmul[0]); \
2379 a[x] = l[y] = !!res; \
2380 total_coeff |= !!res; \
2382 AV_WN16A(&s->eob[n], res); \
2389 #define SPLAT(la, end, step, cond) \
2391 for (n = 1; n < end; n += step) \
2392 la[n] = la[n - 1]; \
2393 } else if (step == 4) { \
2395 for (n = 0; n < end; n += step) \
2396 AV_WN32A(&la[n], la[n] * 0x01010101); \
2398 for (n = 0; n < end; n += step) \
2399 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2403 if (HAVE_FAST_64BIT) { \
2404 for (n = 0; n < end; n += step) \
2405 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2407 for (n = 0; n < end; n += step) { \
2408 uint32_t v32 = la[n] * 0x01010101; \
2409 AV_WN32A(&la[n], v32); \
2410 AV_WN32A(&la[n + 4], v32); \
2414 for (n = 0; n < end; n += step) \
2415 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2418 #define SPLAT_CTX(step) \
2420 SPLAT(a, end_x, step, end_x == w4); \
2421 SPLAT(l, end_y, step, end_y == h4); \
2446 #define DECODE_UV_COEF_LOOP(step, v) \
2447 for (n = 0, y = 0; y < end_y; y += step) { \
2448 for (x = 0; x < end_x; x += step, n += step * step) { \
2449 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2450 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2451 16 * step * step, c, e, p, a[x] + l[y], \
2452 uvscan, uvnb, uv_band_counts, qmul[1]); \
2453 a[x] = l[y] = !!res; \
2454 total_coeff |= !!res; \
2456 AV_WN16A(&s->uveob[pl][n], res); \
2458 s->uveob[pl][n] = res; \
2470 for (pl = 0; pl < 2; pl++) {
2509 uint8_t *dst_edge, ptrdiff_t stride_edge,
2510 uint8_t *dst_inner, ptrdiff_t stride_inner,
2511 uint8_t *l,
int col,
int x,
int w,
2513 int p,
int ss_h,
int ss_v,
int bytesperpixel)
2515 int have_top = row > 0 || y > 0;
2517 int have_right = x < w - 1;
2519 static const uint8_t mode_conv[10][2 ][2 ] = {
2541 static const struct {
2550 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2553 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2554 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2556 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2557 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2566 mode = mode_conv[
mode][have_left][have_top];
2567 if (edges[mode].needs_top) {
2569 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !ss_h) - x) * 4;
2570 int n_px_need_tr = 0;
2572 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2579 top = !(row & 7) && !y ?
2581 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2583 topleft = !(row & 7) && !y ?
2585 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2586 &dst_inner[-stride_inner];
2590 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2591 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2592 n_px_need + n_px_need_tr <= n_px_have) {
2596 if (n_px_need <= n_px_have) {
2597 memcpy(*a, top, n_px_need * bytesperpixel);
2599 #define memset_bpp(c, i1, v, i2, num) do { \
2600 if (bytesperpixel == 1) { \
2601 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2603 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2604 for (n = 0; n < (num); n++) { \
2605 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2609 memcpy(*a, top, n_px_have * bytesperpixel);
2610 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2613 #define memset_val(c, val, num) do { \
2614 if (bytesperpixel == 1) { \
2615 memset((c), (val), (num)); \
2618 for (n = 0; n < (num); n++) { \
2619 AV_WN16A(&(c)[n * 2], (val)); \
2623 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2625 if (edges[mode].needs_topleft) {
2626 if (have_left && have_top) {
2627 #define assign_bpp(c, i1, v, i2) do { \
2628 if (bytesperpixel == 1) { \
2629 (c)[(i1)] = (v)[(i2)]; \
2631 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2636 #define assign_val(c, i, v) do { \
2637 if (bytesperpixel == 1) { \
2640 AV_WN16A(&(c)[(i) * 2], (v)); \
2643 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2646 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2647 if (have_top && have_right &&
2648 n_px_need + n_px_need_tr <= n_px_have) {
2649 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2656 if (edges[mode].needs_left) {
2658 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !ss_v) -
y) * 4;
2659 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2660 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2662 if (edges[mode].invert_left) {
2663 if (n_px_need <= n_px_have) {
2664 for (i = 0; i < n_px_need; i++)
2667 for (i = 0; i < n_px_have; i++)
2669 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2672 if (n_px_need <= n_px_have) {
2673 for (i = 0; i < n_px_need; i++)
2674 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2676 for (i = 0; i < n_px_have; i++)
2677 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2678 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2682 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2690 ptrdiff_t uv_off,
int bytesperpixel)
2694 int row = s->
row, col = s->
col;
2695 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2696 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2697 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2698 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2700 int uvstep1d = 1 << b->
uvtx, p;
2705 for (
n = 0, y = 0; y < end_y; y += step1d) {
2706 uint8_t *ptr = dst, *ptr_r = dst_r;
2707 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2708 ptr_r += 4 * step1d * bytesperpixel,
n += step) {
2718 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2722 s->
block + 16 * n * bytesperpixel, eob);
2732 step = 1 << (b->
uvtx * 2);
2733 for (p = 0; p < 2; p++) {
2734 dst = s->
dst[1 + p];
2736 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2737 uint8_t *ptr = dst, *ptr_r = dst_r;
2738 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2739 ptr_r += 4 * uvstep1d * bytesperpixel,
n += step) {
2746 ptr, s->
uv_stride, l, col, x, w4, row, y,
2751 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
2770 uint8_t *dst, ptrdiff_t dst_stride,
2771 const uint8_t *ref, ptrdiff_t ref_stride,
2773 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *in_mv,
2774 int px,
int py,
int pw,
int ph,
2775 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2776 const uint16_t *scale,
const uint8_t *step)
2778 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2780 int refbw_m1, refbh_m1;
2784 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2785 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2794 ref += y * ref_stride + x * bytesperpixel;
2797 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2798 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2802 th = (y + refbh_m1 + 4 + 7) >> 6;
2804 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2806 ref - 3 * ref_stride - 3 * bytesperpixel,
2808 refbw_m1 + 8, refbh_m1 + 8,
2809 x - 3, y - 3, w, h);
2813 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2818 ptrdiff_t dst_stride,
2819 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2820 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2822 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *in_mv,
2823 int px,
int py,
int pw,
int ph,
2824 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2825 const uint16_t *scale,
const uint8_t *step)
2828 int refbw_m1, refbh_m1;
2834 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 4, (s->
cols * 4 - x + px + 3) << 4);
2837 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2842 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 4, (s->
rows * 4 - y + py + 3) << 4);
2845 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2851 ref_u += y * src_stride_u + x * bytesperpixel;
2852 ref_v += y * src_stride_v + x * bytesperpixel;
2855 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2856 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2860 th = (y + refbh_m1 + 4 + 7) >> (6 - s->
ss_v);
2862 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2864 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2866 refbw_m1 + 8, refbh_m1 + 8,
2867 x - 3, y - 3, w, h);
2869 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2872 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2874 refbw_m1 + 8, refbh_m1 + 8,
2875 x - 3, y - 3, w, h);
2877 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2879 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2880 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2884 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2885 px, py, pw, ph, bw, bh, w, h, i) \
2886 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2887 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2888 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2889 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2890 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2891 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2892 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2893 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2895 #define FN(x) x##_scaled_8bpp
2896 #define BYTES_PER_PIXEL 1
2899 #undef BYTES_PER_PIXEL
2900 #define FN(x) x##_scaled_16bpp
2901 #define BYTES_PER_PIXEL 2
2904 #undef mc_chroma_dir
2906 #undef BYTES_PER_PIXEL
2910 uint8_t *dst, ptrdiff_t dst_stride,
2911 const uint8_t *ref, ptrdiff_t ref_stride,
2913 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2914 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2916 int mx = mv->
x, my = mv->
y,
th;
2920 ref += y * ref_stride + x * bytesperpixel;
2926 th = (y + bh + 4 * !!my + 7) >> 6;
2928 if (x < !!mx * 3 || y < !!my * 3 ||
2929 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2931 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2933 bw + !!mx * 7, bh + !!my * 7,
2934 x - !!mx * 3, y - !!my * 3, w, h);
2938 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2943 ptrdiff_t dst_stride,
2944 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2945 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2947 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2948 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2950 int mx = mv->
x << !s->
ss_h, my = mv->
y << !s->
ss_v,
th;
2954 ref_u += y * src_stride_u + x * bytesperpixel;
2955 ref_v += y * src_stride_v + x * bytesperpixel;
2961 th = (y + bh + 4 * !!my + 7) >> (6 - s->
ss_v);
2963 if (x < !!mx * 3 || y < !!my * 3 ||
2964 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2966 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2968 bw + !!mx * 7, bh + !!my * 7,
2969 x - !!mx * 3, y - !!my * 3, w, h);
2970 ref_u = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2974 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2976 bw + !!mx * 7, bh + !!my * 7,
2977 x - !!mx * 3, y - !!my * 3, w, h);
2978 ref_v = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2979 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2981 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2982 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2986 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2987 px, py, pw, ph, bw, bh, w, h, i) \
2988 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2989 mv, bw, bh, w, h, bytesperpixel)
2990 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2991 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2992 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2993 row, col, mv, bw, bh, w, h, bytesperpixel)
2995 #define FN(x) x##_8bpp
2996 #define BYTES_PER_PIXEL 1
2999 #undef BYTES_PER_PIXEL
3000 #define FN(x) x##_16bpp
3001 #define BYTES_PER_PIXEL 2
3003 #undef mc_luma_dir_dir
3004 #undef mc_chroma_dir_dir
3006 #undef BYTES_PER_PIXEL
3013 int row = s->
row, col = s->
col;
3016 if (bytesperpixel == 1) {
3017 inter_pred_scaled_8bpp(ctx);
3019 inter_pred_scaled_16bpp(ctx);
3022 if (bytesperpixel == 1) {
3023 inter_pred_8bpp(ctx);
3025 inter_pred_16bpp(ctx);
3031 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
3032 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
3033 int end_x =
FFMIN(2 * (s->
cols - col), w4);
3034 int end_y =
FFMIN(2 * (s->
rows - row), h4);
3036 int uvstep1d = 1 << b->
uvtx, p;
3040 for (
n = 0, y = 0; y < end_y; y += step1d) {
3042 for (x = 0; x < end_x; x += step1d,
3043 ptr += 4 * step1d * bytesperpixel,
n += step) {
3048 s->
block + 16 * n * bytesperpixel, eob);
3056 step = 1 << (b->
uvtx * 2);
3057 for (p = 0; p < 2; p++) {
3058 dst = s->
dst[p + 1];
3059 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
3061 for (x = 0; x < end_x; x += uvstep1d,
3062 ptr += 4 * uvstep1d * bytesperpixel,
n += step) {
3067 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
3086 int row_and_7,
int col_and_7,
3087 int w,
int h,
int col_end,
int row_end,
3090 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3091 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3103 if (tx ==
TX_4X4 && (ss_v | ss_h)) {
3118 if (tx ==
TX_4X4 && !skip_inter) {
3119 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
3121 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3123 for (
y = row_and_7;
y < h + row_and_7;
y++) {
3124 int col_mask_id = 2 - !(
y & wide_filter_row_mask[ss_v]);
3126 mask[0][
y][1] |= m_row_8;
3127 mask[0][
y][2] |= m_row_4;
3138 if ((ss_h & ss_v) && (col_end & 1) && (
y & 1)) {
3139 mask[1][
y][col_mask_id] |= (t << (w - 1)) - t;
3141 mask[1][
y][col_mask_id] |= m_col;
3144 mask[0][
y][3] |= m_col;
3146 if (ss_h && (col_end & 1))
3147 mask[1][
y][3] |= (t << (w - 1)) - t;
3149 mask[1][
y][3] |= m_col;
3153 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3156 int mask_id = (tx ==
TX_8X8);
3157 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3158 int l2 = tx + ss_h - 1, step1d;
3159 int m_row = m_col & masks[l2];
3163 if (ss_h && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
3164 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3165 int m_row_8 = m_row - m_row_16;
3167 for (y = row_and_7; y < h + row_and_7; y++) {
3168 mask[0][
y][0] |= m_row_16;
3169 mask[0][
y][1] |= m_row_8;
3172 for (y = row_and_7; y < h + row_and_7; y++)
3173 mask[0][y][mask_id] |= m_row;
3178 if (ss_v && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
3179 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3180 mask[1][y][0] |= m_col;
3181 if (y - row_and_7 == h - 1)
3182 mask[1][
y][1] |= m_col;
3184 for (y = row_and_7; y < h + row_and_7; y += step1d)
3185 mask[1][y][mask_id] |= m_col;
3187 }
else if (tx !=
TX_4X4) {
3190 mask_id = (tx ==
TX_8X8) || (h == ss_v);
3191 mask[1][row_and_7][mask_id] |= m_col;
3192 mask_id = (tx ==
TX_8X8) || (w == ss_h);
3193 for (y = row_and_7; y < h + row_and_7; y++)
3194 mask[0][y][mask_id] |= t;
3196 int t8 = t & wide_filter_col_mask[ss_h],
t4 = t -
t8;
3198 for (y = row_and_7; y < h + row_and_7; y++) {
3202 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3208 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3223 s->
min_mv.
x = -(128 + col * 64);
3224 s->
min_mv.
y = -(128 + row * 64);
3232 b->
uvtx = b->tx - ((s->
ss_h && w4 * 2 == (1 << b->tx)) ||
3233 (s->
ss_v && h4 * 2 == (1 << b->tx)));
3238 if (bytesperpixel == 1) {
3251 #define SPLAT_ZERO_CTX(v, n) \
3253 case 1: v = 0; break; \
3254 case 2: AV_ZERO16(&v); break; \
3255 case 4: AV_ZERO32(&v); break; \
3256 case 8: AV_ZERO64(&v); break; \
3257 case 16: AV_ZERO128(&v); break; \
3259 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3261 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3262 if (s->ss_##dir2) { \
3263 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3264 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3266 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3267 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3286 s->
block += w4 * h4 * 64 * bytesperpixel;
3289 s->
eob += 4 * w4 * h4;
3300 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
3301 (row + h4) > s->
rows;
3302 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
3303 (row + h4) > s->
rows;
3308 s->
dst[0] = f->
data[0] + yoff;
3316 s->
dst[1] = f->
data[1] + uvoff;
3317 s->
dst[2] = f->
data[2] + uvoff;
3336 for (
n = 0; o < w;
n++) {
3342 s->
tmp_y + o, 128,
h, 0, 0);
3343 o += bw * bytesperpixel;
3351 for (
n = s->
ss_h; o < w;
n++) {
3357 s->
tmp_uv[0] + o, 128,
h, 0, 0);
3359 s->
tmp_uv[1] + o, 128,
h, 0, 0);
3360 o += bw * bytesperpixel;
3373 mask_edges(lflvl->
mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3378 b->
uvtx, skip_inter);
3385 limit >>= (sharp + 3) >> 2;
3386 limit =
FFMIN(limit, 9 - sharp);
3388 limit =
FFMAX(limit, 1);
3397 s->
block += w4 * h4 * 64 * bytesperpixel;
3400 s->
eob += 4 * w4 * h4;
3407 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3415 ptrdiff_t hbs = 4 >> bl;
3422 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423 }
else if (col + hbs < s->cols) {
3424 if (row + hbs < s->rows) {
3428 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3431 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432 yoff += hbs * 8 * y_stride;
3433 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3434 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3437 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3438 yoff += hbs * 8 * bytesperpixel;
3439 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3440 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3443 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3445 yoff + 8 * hbs * bytesperpixel,
3446 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3447 yoff += hbs * 8 * y_stride;
3448 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3449 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3450 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3451 yoff + 8 * hbs * bytesperpixel,
3452 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3459 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3461 yoff + 8 * hbs * bytesperpixel,
3462 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3465 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3467 }
else if (row + hbs < s->rows) {
3470 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3471 yoff += hbs * 8 * y_stride;
3472 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3473 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3476 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3480 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3486 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3490 ptrdiff_t hbs = 4 >> bl;
3497 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3498 }
else if (s->
b->
bl == bl) {
3499 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3501 yoff += hbs * 8 * y_stride;
3502 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3503 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3505 yoff += hbs * 8 * bytesperpixel;
3506 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3507 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3511 if (col + hbs < s->cols) {
3512 if (row + hbs < s->rows) {
3513 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3514 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3515 yoff += hbs * 8 * y_stride;
3516 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3517 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3519 yoff + 8 * hbs * bytesperpixel,
3520 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3522 yoff += hbs * 8 * bytesperpixel;
3523 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3524 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3526 }
else if (row + hbs < s->rows) {
3527 yoff += hbs * 8 * y_stride;
3528 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3529 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3541 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3542 uint8_t *ptr = dst, *l = lvl, *hmask1 =
mask[
y], *hmask2 =
mask[y + 1 + ss_v];
3543 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3544 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3545 unsigned hm = hm1 | hm2 | hm13 | hm23;
3547 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3550 int L = *l,
H = L >> 4;
3553 if (hmask1[0] & x) {
3554 if (hmask2[0] & x) {
3560 }
else if (hm2 & x) {
3567 [0](ptr, ls,
E, I,
H);
3570 [0](ptr, ls, E, I, H);
3572 }
else if (hm2 & x) {
3573 int L = l[8 << ss_v],
H = L >> 4;
3577 [0](ptr + 8 * ls, ls, E, I, H);
3585 int L = *l,
H = L >> 4;
3597 }
else if (hm23 & x) {
3598 int L = l[8 << ss_v],
H = L >> 4;
3618 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3620 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3622 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3625 int L = *l,
H = L >> 4;
3629 if (vmask[0] & (x << (1 + ss_h))) {
3635 }
else if (vm & (x << (1 + ss_h))) {
3641 [!!(vmask[1] & (x << (1 + ss_h)))]
3642 [1](ptr, ls,
E, I,
H);
3645 [1](ptr, ls, E, I, H);
3647 }
else if (vm & (x << (1 + ss_h))) {
3648 int L = l[1 + ss_h],
H = L >> 4;
3652 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3657 int L = *l,
H = L >> 4;
3660 if (vm3 & (x << (1 + ss_h))) {
3669 }
else if (vm3 & (x << (1 + ss_h))) {
3670 int L = l[1 + ss_h],
H = L >> 4;
3687 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3705 for (p = 0; p < 2; p++) {
3706 dst = f->
data[1 + p] + uvoff;
3714 int sb_start = ( idx *
n) >> log2_n;
3715 int sb_end = ((idx + 1) * n) >> log2_n;
3716 *start =
FFMIN(sb_start, n) << 3;
3717 *end =
FFMIN(sb_end, n) << 3;
3721 int max_count,
int update_factor)
3723 unsigned ct = ct0 + ct1, p2, p1;
3729 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3730 p2 = av_clip(p2, 1, 255);
3731 ct =
FFMIN(ct, max_count);
3732 update_factor =
FASTDIV(update_factor * ct, max_count);
3735 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3745 for (i = 0; i < 4; i++)
3746 for (j = 0; j < 2; j++)
3747 for (k = 0; k < 2; k++)
3748 for (l = 0; l < 6; l++)
3749 for (m = 0; m < 6; m++) {
3754 if (l == 0 && m >= 3)
3758 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3771 for (i = 0; i < 3; i++)
3775 for (i = 0; i < 4; i++)
3780 for (i = 0; i < 5; i++)
3786 for (i = 0; i < 5; i++)
3792 for (i = 0; i < 5; i++) {
3796 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3797 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3802 for (i = 0; i < 4; i++)
3803 for (j = 0; j < 4; j++) {
3807 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3808 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3814 for (i = 0; i < 2; i++) {
3820 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3828 for (i = 0; i < 4; i++) {
3832 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3838 for (i = 0; i < 7; i++) {
3842 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3843 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3852 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3853 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3858 for (i = 0; i < 2; i++) {
3860 unsigned *
c, (*c2)[2], sum;
3867 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3872 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3875 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3879 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3887 for (j = 0; j < 10; j++)
3888 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3890 for (j = 0; j < 2; j++) {
3893 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3894 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3899 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3900 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3912 for (i = 0; i < 4; i++) {
3916 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3936 for (i = 0; i < 10; i++) {
3940 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3972 for (i = 0; i < 3; i++) {
3977 for (i = 0; i < 8; i++) {
3999 int res, tile_row, tile_col, i, ref, row, col;
4002 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4008 }
else if (res == 0) {
4017 for (i = 0; i < 8; i++) {
4053 for (i = 0; i < 8; i++) {
4082 "Failed to allocate block buffers\n");
4088 for (i = 0; i < 4; i++) {
4089 for (j = 0; j < 2; j++)
4090 for (k = 0; k < 2; k++)
4091 for (l = 0; l < 6; l++)
4092 for (m = 0; m < 6; m++)
4129 if (tile_size > size) {
4144 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->
ss_v) {
4146 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4164 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
4168 col < s->tiling.tile_col_end;
4169 col += 8, yoff2 += 64 * bytesperpixel,
4170 uvoff2 += 64 * bytesperpixel >> s->
ss_h, lflvl_ptr++) {
4174 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
4186 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
4196 if (row + 8 < s->
rows) {
4198 f->
data[0] + yoff + 63 * ls_y,
4199 8 * s->
cols * bytesperpixel);
4201 f->
data[1] + uvoff + ((64 >> s->
ss_v) - 1) * ls_uv,
4202 8 * s->
cols * bytesperpixel >> s->
ss_h);
4204 f->
data[2] + uvoff + ((64 >> s->
ss_v) - 1) * ls_uv,
4205 8 * s->
cols * bytesperpixel >> s->
ss_h);
4212 lflvl_ptr = s->
lflvl;
4213 for (col = 0; col < s->
cols;
4214 col += 8, yoff2 += 64 * bytesperpixel,
4215 uvoff2 += 64 * bytesperpixel >> s->
ss_h, lflvl_ptr++) {
4231 }
while (s->
pass++ == 1);
4235 for (i = 0; i < 8; i++) {
4255 for (i = 0; i < 3; i++)
4257 for (i = 0; i < 8; i++)
4266 for (i = 0; i < 3; i++) {
4274 for (i = 0; i < 8; i++) {
4310 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4314 for (i = 0; i < 3; i++) {
4317 if (ssrc->frames[i].tf.f->data[0]) {
4322 for (i = 0; i < 8; i++) {
4325 if (ssrc->next_refs[i].f->data[0]) {
4334 s->
ss_v = ssrc->ss_v;
4335 s->
ss_h = ssrc->ss_h;
4343 if (ssrc->segmentation.enabled) {
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B