35 #define VP9_SYNCCODE 0x498342
148 #define MAX_SEGMENT 8
207 unsigned coef[4][2][2][6][6][3];
208 unsigned eob[4][2][2][6][6][2];
256 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
257 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
259 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
260 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
327 s->
cols = (w + 7) >> 3;
328 s->
rows = (h + 7) >> 3;
330 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
407 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
413 static const int inv_map_table[254] = {
414 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
415 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
416 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
417 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
418 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
419 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
420 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
421 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
422 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
423 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
424 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
425 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
426 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
427 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
428 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
429 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
430 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
431 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
472 int c, i, j, k, l,
m,
n, w, h, max, size2, res, sharp;
601 for (i = 0; i < 4; i++)
604 for (i = 0; i < 2; i++)
623 for (i = 0; i < 7; i++)
627 for (i = 0; i < 3; i++)
636 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
643 for (i = 0; i < 8; i++) {
662 int qyac, qydc, quvac, quvdc, lflvl, sh;
672 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
675 qyac = av_clip_uintp2(qyac, 8);
693 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
694 for (j = 1; j < 4; j++) {
712 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
713 max =
FFMAX(0, max - 1);
748 if (size2 > size - (data2 - data)) {
777 for (i = 0; i < 2; i++)
780 for (i = 0; i < 2; i++)
781 for (j = 0; j < 2; j++)
785 for (i = 0; i < 2; i++)
786 for (j = 0; j < 3; j++)
794 for (i = 0; i < 4; i++) {
797 for (j = 0; j < 2; j++)
798 for (k = 0; k < 2; k++)
799 for (l = 0; l < 6; l++)
800 for (m = 0; m < 6; m++) {
803 if (m >= 3 && l == 0)
805 for (n = 0; n < 3; n++) {
815 for (j = 0; j < 2; j++)
816 for (k = 0; k < 2; k++)
817 for (l = 0; l < 6; l++)
818 for (m = 0; m < 6; m++) {
832 for (i = 0; i < 3; i++)
836 for (i = 0; i < 7; i++)
837 for (j = 0; j < 3; j++)
843 for (i = 0; i < 4; i++)
844 for (j = 0; j < 2; j++)
849 for (i = 0; i < 4; i++)
858 for (i = 0; i < 5; i++)
867 for (i = 0; i < 5; i++) {
878 for (i = 0; i < 5; i++)
884 for (i = 0; i < 4; i++)
885 for (j = 0; j < 9; j++)
890 for (i = 0; i < 4; i++)
891 for (j = 0; j < 4; j++)
892 for (k = 0; k < 3; k++)
898 for (i = 0; i < 3; i++)
902 for (i = 0; i < 2; i++) {
906 for (j = 0; j < 10; j++)
914 for (j = 0; j < 10; j++)
920 for (i = 0; i < 2; i++) {
921 for (j = 0; j < 2; j++)
922 for (k = 0; k < 3; k++)
927 for (j = 0; j < 3; j++)
934 for (i = 0; i < 2; i++) {
946 return (data2 - data) + size2;
957 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
959 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
960 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
961 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
962 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
963 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
964 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
965 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
966 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
967 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
969 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
970 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
971 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
972 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
973 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
974 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
975 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
976 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
977 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
978 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
984 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
985 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
988 int row = s->
row, col = s->
col, row7 = s->
row7;
989 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
990 #define INVALID_MV 0x80008000U
994 #define RETURN_DIRECT_MV(mv) \
996 uint32_t m = AV_RN32A(&mv); \
1000 } else if (mem == INVALID_MV) { \
1002 } else if (m != mem) { \
1009 if (sb == 2 || sb == 1) {
1011 }
else if (sb == 3) {
1017 #define RETURN_MV(mv) \
1022 clamp_mv(&tmp, &mv, s); \
1023 m = AV_RN32A(&tmp); \
1027 } else if (mem == INVALID_MV) { \
1029 } else if (m != mem) { \
1034 uint32_t m = AV_RN32A(&mv); \
1036 clamp_mv(pmv, &mv, s); \
1038 } else if (mem == INVALID_MV) { \
1040 } else if (m != mem) { \
1041 clamp_mv(pmv, &mv, s); \
1049 if (mv->
ref[0] == ref) {
1051 }
else if (mv->
ref[1] == ref) {
1057 if (mv->
ref[0] == ref) {
1059 }
else if (mv->
ref[1] == ref) {
1069 for (; i < 8; i++) {
1070 int c = p[i][0] + col,
r = p[i][1] + row;
1075 if (mv->
ref[0] == ref) {
1077 }
else if (mv->
ref[1] == ref) {
1089 if (mv->
ref[0] == ref) {
1091 }
else if (mv->
ref[1] == ref) {
1096 #define RETURN_SCALE_MV(mv, scale) \
1099 VP56mv mv_temp = { -mv.x, -mv.y }; \
1100 RETURN_MV(mv_temp); \
1107 for (i = 0; i < 8; i++) {
1108 int c = p[i][0] + col,
r = p[i][1] + row;
1113 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1116 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1130 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1133 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1144 #undef RETURN_SCALE_MV
1158 for (n = 0, m = 0; m <
c; m++) {
1184 n = (n << 3) | (bit << 1);
1197 return sign ? -(n + 1) : (n + 1);
1212 mode ==
NEWMV ? -1 : sb);
1214 if ((mode ==
NEWMV || sb == -1) &&
1229 if (mode ==
NEWMV) {
1243 mode ==
NEWMV ? -1 : sb);
1244 if ((mode ==
NEWMV || sb == -1) &&
1259 if (mode ==
NEWMV) {
1284 int v16 = v * 0x0101;
1292 uint32_t v32 = v * 0x01010101;
1301 uint64_t v64 = v * 0x0101010101010101ULL;
1307 uint32_t v32 = v * 0x01010101;
1322 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1325 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1333 int row = s->
row, col = s->
col, row7 = s->
row7;
1334 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1338 int vref, filter_id;
1355 for (
y = 0;
y < h4;
y++) {
1356 int idx_base = (
y + row) * 8 * s->
sb_cols + col;
1357 for (x = 0; x < w4; x++)
1358 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1364 &refsegmap[idx_base], w4);
1403 if (have_a && have_l) {
1427 }
else if (have_l) {
1475 l[0] = a[1] = b->
mode[1];
1477 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1485 l[1] = a[1] = b->
mode[3];
1487 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1491 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1503 }
else if (b->
intra) {
1532 static const uint8_t size_group[10] = {
1533 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1535 int sz = size_group[b->
bs];
1546 static const uint8_t inter_mode_ctx_lut[14][14] = {
1547 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1548 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1549 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1550 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1551 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1552 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1553 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1554 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1555 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1556 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1557 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1558 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1559 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1560 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1595 }
else if (have_l) {
1626 if (refl == refa && refa == s->
varcompref[1]) {
1633 c = (refa == refl) ? 3 : 1;
1650 c = (refl == refa) ? 4 : 2;
1662 }
else if (have_l) {
1788 }
else if (have_l) {
1802 b->
ref[0] = 1 + bit;
1811 static const uint8_t off[10] = {
1812 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1908 #define SPLAT_CTX(var, val, n) \
1910 case 1: var = val; break; \
1911 case 2: AV_WN16A(&var, val * 0x0101); break; \
1912 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1915 uint64_t v64 = val * 0x0101010101010101ULL; \
1916 AV_WN64A( &var, v64); \
1917 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1922 #define SPLAT_CTX(var, val, n) \
1924 case 1: var = val; break; \
1925 case 2: AV_WN16A(&var, val * 0x0101); break; \
1926 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1928 uint32_t v32 = val * 0x01010101; \
1929 AV_WN32A( &var, v32); \
1930 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1934 uint32_t v32 = val * 0x01010101; \
1935 AV_WN32A( &var, v32); \
1936 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1937 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1938 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1945 #define SET_CTXS(dir, off, n) \
1947 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1948 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1949 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1950 if (!s->keyframe && !s->intraonly) { \
1951 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1952 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1953 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1955 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1956 if (s->filtermode == FILTER_SWITCHABLE) { \
1957 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1962 case 1:
SET_CTXS(above, col, 1);
break;
1963 case 2:
SET_CTXS(above, col, 2);
break;
1964 case 4:
SET_CTXS(above, col, 4);
break;
1965 case 8:
SET_CTXS(above, col, 8);
break;
1968 case 1:
SET_CTXS(left, row7, 1);
break;
1969 case 2:
SET_CTXS(left, row7, 2);
break;
1970 case 4:
SET_CTXS(left, row7, 4);
break;
1971 case 8:
SET_CTXS(left, row7, 8);
break;
1991 for (n = 0; n < w4 * 2; n++) {
1995 for (n = 0; n < h4 * 2; n++) {
2003 for (
y = 0;
y < h4;
y++) {
2004 int x, o = (row +
y) * s->
sb_cols * 8 + col;
2008 for (x = 0; x < w4; x++) {
2012 }
else if (b->
comp) {
2013 for (x = 0; x < w4; x++) {
2014 mv[x].ref[0] = b->
ref[0];
2015 mv[x].ref[1] = b->
ref[1];
2020 for (x = 0; x < w4; x++) {
2021 mv[x].ref[0] = b->
ref[0];
2032 int is_tx32x32,
unsigned (*cnt)[6][3],
2033 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2034 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2035 const int16_t *band_counts,
const int16_t *qmul)
2037 int i = 0,
band = 0, band_left = band_counts[
band];
2051 cnt[
band][nnz][0]++;
2053 band_left = band_counts[++
band];
2055 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2057 if (++i == n_coeffs)
2064 cnt[
band][nnz][1]++;
2072 cnt[
band][nnz][2]++;
2075 cache[rc] = val = 2;
2126 band_left = band_counts[++
band];
2131 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2133 }
while (++i < n_coeffs);
2139 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2140 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2141 const int16_t (*nb)[2],
const int16_t *band_counts,
2142 const int16_t *qmul)
2145 nnz, scan, nb, band_counts, qmul);
2149 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2150 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2151 const int16_t (*nb)[2],
const int16_t *band_counts,
2152 const int16_t *qmul)
2155 nnz, scan, nb, band_counts, qmul);
2162 int row = s->
row, col = s->
col;
2167 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2168 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2169 int n, pl, x,
y, res;
2172 const int16_t *
const *yscans =
vp9_scans[tx];
2178 static const int16_t band_counts[4][8] = {
2179 { 1, 2, 3, 4, 3, 16 - 13 },
2180 { 1, 2, 3, 4, 11, 64 - 21 },
2181 { 1, 2, 3, 4, 11, 256 - 21 },
2182 { 1, 2, 3, 4, 11, 1024 - 21 },
2184 const int16_t *y_band_counts = band_counts[b->tx];
2185 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2187 #define MERGE(la, end, step, rd) \
2188 for (n = 0; n < end; n += step) \
2189 la[n] = !!rd(&la[n])
2190 #define MERGE_CTX(step, rd) \
2192 MERGE(l, end_y, step, rd); \
2193 MERGE(a, end_x, step, rd); \
2196 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2197 for (n = 0, y = 0; y < end_y; y += step) { \
2198 for (x = 0; x < end_x; x += step, n += step * step) { \
2199 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2200 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2201 c, e, p, a[x] + l[y], yscans[txtp], \
2202 ynbs[txtp], y_band_counts, qmul[0]); \
2203 a[x] = l[y] = !!res; \
2205 AV_WN16A(&s->eob[n], res); \
2212 #define SPLAT(la, end, step, cond) \
2214 for (n = 1; n < end; n += step) \
2215 la[n] = la[n - 1]; \
2216 } else if (step == 4) { \
2218 for (n = 0; n < end; n += step) \
2219 AV_WN32A(&la[n], la[n] * 0x01010101); \
2221 for (n = 0; n < end; n += step) \
2222 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2226 if (HAVE_FAST_64BIT) { \
2227 for (n = 0; n < end; n += step) \
2228 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2230 for (n = 0; n < end; n += step) { \
2231 uint32_t v32 = la[n] * 0x01010101; \
2232 AV_WN32A(&la[n], v32); \
2233 AV_WN32A(&la[n + 4], v32); \
2237 for (n = 0; n < end; n += step) \
2238 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2241 #define SPLAT_CTX(step) \
2243 SPLAT(a, end_x, step, end_x == w4); \
2244 SPLAT(l, end_y, step, end_y == h4); \
2269 #define DECODE_UV_COEF_LOOP(step) \
2270 for (n = 0, y = 0; y < end_y; y += step) { \
2271 for (x = 0; x < end_x; x += step, n += step * step) { \
2272 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2273 16 * step * step, c, e, p, a[x] + l[y], \
2274 uvscan, uvnb, uv_band_counts, qmul[1]); \
2275 a[x] = l[y] = !!res; \
2277 AV_WN16A(&s->uveob[pl][n], res); \
2279 s->uveob[pl][n] = res; \
2291 for (pl = 0; pl < 2; pl++) {
2313 1024, c, e, p, a[0] + l[0],
2314 uvscan, uvnb, uv_band_counts, qmul[1]);
2315 a[0] = l[0] = !!res;
2324 uint8_t *dst_edge, ptrdiff_t stride_edge,
2325 uint8_t *dst_inner, ptrdiff_t stride_inner,
2326 uint8_t *l,
int col,
int x,
int w,
2330 int have_top = row > 0 || y > 0;
2332 int have_right = x < w - 1;
2333 static const uint8_t mode_conv[10][2 ][2 ] = {
2355 static const struct {
2364 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2367 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2368 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2370 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2371 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2380 mode = mode_conv[
mode][have_left][have_top];
2381 if (edges[mode].needs_top) {
2383 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !p) - x) * 4;
2384 int n_px_need_tr = 0;
2386 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2393 top = !(row & 7) && !y ?
2395 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2397 topleft = !(row & 7) && !y ?
2399 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2400 &dst_inner[-stride_inner];
2404 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2405 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2406 n_px_need + n_px_need_tr <= n_px_have) {
2410 if (n_px_need <= n_px_have) {
2411 memcpy(*a, top, n_px_need);
2413 memcpy(*a, top, n_px_have);
2414 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2415 n_px_need - n_px_have);
2418 memset(*a, 127, n_px_need);
2420 if (edges[mode].needs_topleft) {
2421 if (have_left && have_top) {
2422 (*a)[-1] = topleft[-1];
2424 (*a)[-1] = have_top ? 129 : 127;
2427 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2428 if (have_top && have_right &&
2429 n_px_need + n_px_need_tr <= n_px_have) {
2430 memcpy(&(*a)[4], &top[4], 4);
2432 memset(&(*a)[4], (*a)[3], 4);
2437 if (edges[mode].needs_left) {
2439 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !p) -
y) * 4;
2440 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2441 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2443 if (edges[mode].invert_left) {
2444 if (n_px_need <= n_px_have) {
2445 for (i = 0; i < n_px_need; i++)
2446 l[i] = dst[i * stride - 1];
2448 for (i = 0; i < n_px_have; i++)
2449 l[i] = dst[i * stride - 1];
2450 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2453 if (n_px_need <= n_px_have) {
2454 for (i = 0; i < n_px_need; i++)
2455 l[n_px_need - 1 - i] = dst[i * stride - 1];
2457 for (i = 0; i < n_px_have; i++)
2458 l[n_px_need - 1 - i] = dst[i * stride - 1];
2459 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2463 memset(l, 129, 4 << tx);
2474 int row = s->
row, col = s->
col;
2475 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2476 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2477 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2478 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2480 int uvstep1d = 1 << b->
uvtx, p;
2485 for (
n = 0, y = 0; y < end_y; y += step1d) {
2486 uint8_t *ptr = dst, *ptr_r = dst_r;
2487 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2488 ptr_r += 4 * step1d,
n += step) {
2498 col, x, w4, row, y, b->tx, 0);
2512 step = 1 << (b->
uvtx * 2);
2513 for (p = 0; p < 2; p++) {
2514 dst = s->
dst[1 + p];
2516 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2517 uint8_t *ptr = dst, *ptr_r = dst_r;
2518 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2519 ptr_r += 4 * uvstep1d,
n += step) {
2527 col, x, w4, row, y, b->
uvtx, p + 1);
2540 uint8_t *dst, ptrdiff_t dst_stride,
2541 const uint8_t *ref, ptrdiff_t ref_stride,
2543 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2544 int bw,
int bh,
int w,
int h)
2546 int mx = mv->
x, my = mv->
y,
th;
2550 ref += y * ref_stride + x;
2556 th = (y + bh + 4 * !!my + 7) >> 6;
2558 if (x < !!mx * 3 || y < !!my * 3 ||
2559 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2561 ref - !!my * 3 * ref_stride - !!mx * 3,
2563 bw + !!mx * 7, bh + !!my * 7,
2564 x - !!mx * 3, y - !!my * 3, w, h);
2568 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2573 ptrdiff_t dst_stride,
2574 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2575 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2577 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2578 int bw,
int bh,
int w,
int h)
2580 int mx = mv->
x, my = mv->
y,
th;
2584 ref_u += y * src_stride_u + x;
2585 ref_v += y * src_stride_v + x;
2591 th = (y + bh + 4 * !!my + 7) >> 5;
2593 if (x < !!mx * 3 || y < !!my * 3 ||
2594 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2596 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2598 bw + !!mx * 7, bh + !!my * 7,
2599 x - !!mx * 3, y - !!my * 3, w, h);
2601 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2604 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2606 bw + !!mx * 7, bh + !!my * 7,
2607 x - !!mx * 3, y - !!my * 3, w, h);
2609 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2611 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2612 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2619 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2620 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2624 int row = s->
row, col = s->
col;
2642 row << 3, col << 3, &b->
mv[0][0], 8, 4, w1, h1);
2644 s->
dst[0] + 4 * ls_y, ls_y,
2646 (row << 3) + 4, col << 3, &b->
mv[2][0], 8, 4, w1, h1);
2650 ref2->data[0], ref2->linesize[0], tref2,
2651 row << 3, col << 3, &b->
mv[0][1], 8, 4, w2, h2);
2653 s->
dst[0] + 4 * ls_y, ls_y,
2654 ref2->data[0], ref2->linesize[0], tref2,
2655 (row << 3) + 4, col << 3, &b->
mv[2][1], 8, 4, w2, h2);
2660 row << 3, col << 3, &b->
mv[0][0], 4, 8, w1, h1);
2663 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 8, w1, h1);
2667 ref2->data[0], ref2->linesize[0], tref2,
2668 row << 3, col << 3, &b->
mv[0][1], 4, 8, w2, h2);
2670 ref2->data[0], ref2->linesize[0], tref2,
2671 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 8, w2, h2);
2680 row << 3, col << 3, &b->
mv[0][0], 4, 4, w1, h1);
2683 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 4, w1, h1);
2685 s->
dst[0] + 4 * ls_y, ls_y,
2687 (row << 3) + 4, col << 3, &b->
mv[2][0], 4, 4, w1, h1);
2689 s->
dst[0] + 4 * ls_y + 4, ls_y,
2691 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][0], 4, 4, w1, h1);
2695 ref2->data[0], ref2->linesize[0], tref2,
2696 row << 3, col << 3, &b->
mv[0][1], 4, 4, w2, h2);
2698 ref2->data[0], ref2->linesize[0], tref2,
2699 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 4, w2, h2);
2701 s->
dst[0] + 4 * ls_y, ls_y,
2702 ref2->data[0], ref2->linesize[0], tref2,
2703 (row << 3) + 4, col << 3, &b->
mv[2][1], 4, 4, w2, h2);
2705 s->
dst[0] + 4 * ls_y + 4, ls_y,
2706 ref2->data[0], ref2->linesize[0], tref2,
2707 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][1], 4, 4, w2, h2);
2711 int bwl = bwlog_tab[0][b->
bs];
2716 row << 3, col << 3, &b->
mv[0][0],bw, bh, w1, h1);
2720 ref2->data[0], ref2->linesize[0], tref2,
2721 row << 3, col << 3, &b->
mv[0][1], bw, bh, w2, h2);
2726 int bwl = bwlog_tab[1][b->
bs];
2744 s->
dst[1], s->
dst[2], ls_uv,
2747 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2757 s->
dst[1], s->
dst[2], ls_uv,
2758 ref2->data[1], ref2->linesize[1],
2759 ref2->data[2], ref2->linesize[2], tref2,
2760 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2767 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2768 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2769 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2770 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2772 int uvstep1d = 1 << b->
uvtx, p;
2776 for (
n = 0, y = 0; y < end_y; y += step1d) {
2778 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
n += step) {
2791 step = 1 << (b->
uvtx * 2);
2792 for (p = 0; p < 2; p++) {
2793 dst = s->
dst[p + 1];
2794 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2796 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
n += step) {
2810 int row_and_7,
int col_and_7,
2811 int w,
int h,
int col_end,
int row_end,
2824 if (tx ==
TX_4X4 && is_uv) {
2839 if (tx ==
TX_4X4 && !skip_inter) {
2840 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
2841 int m_col_odd = (t << (w - 1)) - t;
2845 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2847 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2848 int col_mask_id = 2 - !(
y & 7);
2850 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2851 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2862 if ((col_end & 1) && (
y & 1)) {
2863 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col_odd;
2865 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2869 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2871 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2872 int col_mask_id = 2 - !(
y & 3);
2874 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2875 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2876 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2877 lflvl->
mask[is_uv][0][
y][3] |= m_col;
2878 lflvl->
mask[is_uv][1][
y][3] |= m_col;
2882 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2885 int mask_id = (tx ==
TX_8X8);
2886 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2887 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2888 int m_row = m_col & masks[l2];
2892 if (is_uv && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
2893 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2894 int m_row_8 = m_row - m_row_16;
2896 for (y = row_and_7; y < h + row_and_7; y++) {
2897 lflvl->
mask[is_uv][0][
y][0] |= m_row_16;
2898 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2901 for (y = row_and_7; y < h + row_and_7; y++)
2902 lflvl->
mask[is_uv][0][y][mask_id] |= m_row;
2905 if (is_uv && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
2906 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2907 lflvl->
mask[is_uv][1][y][0] |= m_col;
2908 if (y - row_and_7 == h - 1)
2909 lflvl->
mask[is_uv][1][
y][1] |= m_col;
2911 for (y = row_and_7; y < h + row_and_7; y += step1d)
2912 lflvl->
mask[is_uv][1][y][mask_id] |= m_col;
2914 }
else if (tx !=
TX_4X4) {
2917 mask_id = (tx ==
TX_8X8) || (is_uv && h == 1);
2918 lflvl->
mask[is_uv][1][row_and_7][mask_id] |= m_col;
2919 mask_id = (tx ==
TX_8X8) || (is_uv && w == 1);
2920 for (y = row_and_7; y < h + row_and_7; y++)
2921 lflvl->
mask[is_uv][0][y][mask_id] |= t;
2923 int t8 = t & 0x01,
t4 = t -
t8;
2925 for (y = row_and_7; y < h + row_and_7; y++) {
2926 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2927 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2929 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2931 int t8 = t & 0x11,
t4 = t -
t8;
2933 for (y = row_and_7; y < h + row_and_7; y++) {
2934 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2935 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2937 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2943 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2957 s->
min_mv.
x = -(128 + col * 64);
2958 s->
min_mv.
y = -(128 + row * 64);
2966 b->
uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2973 #define SPLAT_ZERO_CTX(v, n) \
2975 case 1: v = 0; break; \
2976 case 2: AV_ZERO16(&v); break; \
2977 case 4: AV_ZERO32(&v); break; \
2978 case 8: AV_ZERO64(&v); break; \
2979 case 16: AV_ZERO128(&v); break; \
2981 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2983 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2984 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2985 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3003 s->
block += w4 * h4 * 64;
3004 s->
uvblock[0] += w4 * h4 * 16;
3005 s->
uvblock[1] += w4 * h4 * 16;
3006 s->
eob += 4 * w4 * h4;
3007 s->
uveob[0] += w4 * h4;
3008 s->
uveob[1] += w4 * h4;
3017 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
3018 (row + h4) > s->
rows;
3019 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
3020 (row + h4) > s->
rows;
3025 s->
dst[0] = f->
data[0] + yoff;
3033 s->
dst[1] = f->
data[1] + uvoff;
3034 s->
dst[2] = f->
data[2] + uvoff;
3045 for (
n = 0; o < w;
n++) {
3051 s->
tmp_y + o, 64, h, 0, 0);
3059 for (
n = 1; o < w;
n++) {
3065 s->
tmp_uv[0] + o, 32, h, 0, 0);
3067 s->
tmp_uv[1] + o, 32, h, 0, 0);
3081 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3082 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3085 b->
uvtx, skip_inter);
3092 limit >>= (sharp + 3) >> 2;
3093 limit =
FFMIN(limit, 9 - sharp);
3095 limit =
FFMAX(limit, 1);
3104 s->
block += w4 * h4 * 64;
3105 s->
uvblock[0] += w4 * h4 * 16;
3106 s->
uvblock[1] += w4 * h4 * 16;
3107 s->
eob += 4 * w4 * h4;
3108 s->
uveob[0] += w4 * h4;
3109 s->
uveob[1] += w4 * h4;
3114 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3122 ptrdiff_t hbs = 4 >> bl;
3128 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3129 }
else if (col + hbs < s->cols) {
3130 if (row + hbs < s->rows) {
3134 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3137 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3138 yoff += hbs * 8 * y_stride;
3139 uvoff += hbs * 4 * uv_stride;
3140 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3143 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3146 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3149 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3151 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3152 yoff += hbs * 8 * y_stride;
3153 uvoff += hbs * 4 * uv_stride;
3154 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3155 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3156 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3163 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3165 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3168 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3170 }
else if (row + hbs < s->rows) {
3173 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3174 yoff += hbs * 8 * y_stride;
3175 uvoff += hbs * 4 * uv_stride;
3176 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3179 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3183 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3189 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3193 ptrdiff_t hbs = 4 >> bl;
3199 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3200 }
else if (s->
b->
bl == bl) {
3201 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3203 yoff += hbs * 8 * y_stride;
3204 uvoff += hbs * 4 * uv_stride;
3205 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3209 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3213 if (col + hbs < s->cols) {
3214 if (row + hbs < s->rows) {
3216 uvoff + 4 * hbs, bl + 1);
3217 yoff += hbs * 8 * y_stride;
3218 uvoff += hbs * 4 * uv_stride;
3219 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3221 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3225 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3227 }
else if (row + hbs < s->rows) {
3228 yoff += hbs * 8 * y_stride;
3229 uvoff += hbs * 4 * uv_stride;
3230 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3236 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3251 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3252 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[0][0][
y];
3254 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3255 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3256 unsigned hm = hm1 | hm2 | hm13 | hm23;
3258 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3260 int L = *l,
H = L >> 4;
3264 if (hmask1[0] & x) {
3265 if (hmask2[0] & x) {
3271 }
else if (hm2 & x) {
3278 [0](ptr, ls_y,
E, I,
H);
3281 [0](ptr, ls_y, E, I, H);
3284 }
else if (hm2 & x) {
3285 int L = l[8],
H = L >> 4;
3290 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3294 int L = *l,
H = L >> 4;
3306 }
else if (hm23 & x) {
3307 int L = l[8],
H = L >> 4;
3318 dst = f->
data[0] + yoff;
3320 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3321 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[0][1][
y];
3322 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3324 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3327 int L = *l,
H = L >> 4;
3331 if (vmask[0] & (x << 1)) {
3337 }
else if (vm & (x << 1)) {
3343 [!!(vmask[1] & (x << 1))]
3344 [1](ptr, ls_y, E, I, H);
3347 [1](ptr, ls_y, E, I, H);
3349 }
else if (vm & (x << 1)) {
3350 int L = l[1],
H = L >> 4;
3354 [1](ptr + 8, ls_y,
E, I,
H);
3358 int L = *l,
H = L >> 4;
3361 if (vm3 & (x << 1)) {
3370 }
else if (vm3 & (x << 1)) {
3371 int L = l[1],
H = L >> 4;
3380 for (p = 0; p < 2; p++) {
3382 dst = f->
data[1 + p] + uvoff;
3383 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3384 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[1][0][
y];
3386 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3387 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3389 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3392 int L = *l,
H = L >> 4;
3395 if (hmask1[0] & x) {
3396 if (hmask2[0] & x) {
3402 }
else if (hm2 & x) {
3409 [0](ptr, ls_uv,
E, I,
H);
3412 [0](ptr, ls_uv, E, I, H);
3414 }
else if (hm2 & x) {
3415 int L = l[16],
H = L >> 4;
3419 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3427 dst = f->
data[1 + p] + uvoff;
3428 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3429 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[1][1][
y];
3430 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3432 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3435 int L = *l,
H = L >> 4;
3439 if (vmask[0] & (x << 2)) {
3445 }
else if (vm & (x << 2)) {
3451 [!!(vmask[1] & (x << 2))]
3452 [1](ptr, ls_uv, E, I, H);
3455 [1](ptr, ls_uv, E, I, H);
3457 }
else if (vm & (x << 2)) {
3458 int L = l[2],
H = L >> 4;
3462 [1](ptr + 8, ls_uv,
E, I,
H);
3474 int sb_start = ( idx *
n) >> log2_n;
3475 int sb_end = ((idx + 1) * n) >> log2_n;
3476 *start =
FFMIN(sb_start, n) << 3;
3477 *end =
FFMIN(sb_end, n) << 3;
3481 int max_count,
int update_factor)
3483 unsigned ct = ct0 + ct1, p2, p1;
3489 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3490 p2 = av_clip(p2, 1, 255);
3491 ct =
FFMIN(ct, max_count);
3492 update_factor =
FASTDIV(update_factor * ct, max_count);
3495 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3505 for (i = 0; i < 4; i++)
3506 for (j = 0; j < 2; j++)
3507 for (k = 0; k < 2; k++)
3508 for (l = 0; l < 6; l++)
3509 for (m = 0; m < 6; m++) {
3514 if (l == 0 && m >= 3)
3518 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3531 for (i = 0; i < 3; i++)
3535 for (i = 0; i < 4; i++)
3540 for (i = 0; i < 5; i++)
3546 for (i = 0; i < 5; i++)
3552 for (i = 0; i < 5; i++) {
3556 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3557 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3562 for (i = 0; i < 4; i++)
3563 for (j = 0; j < 4; j++) {
3567 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3568 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3574 for (i = 0; i < 2; i++) {
3580 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3588 for (i = 0; i < 4; i++) {
3592 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3598 for (i = 0; i < 7; i++) {
3602 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3603 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3612 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3613 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3618 for (i = 0; i < 2; i++) {
3620 unsigned *
c, (*c2)[2], sum;
3627 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3632 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3635 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3639 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3647 for (j = 0; j < 10; j++)
3648 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3650 for (j = 0; j < 2; j++) {
3653 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3654 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3659 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3660 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3672 for (i = 0; i < 4; i++) {
3676 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3696 for (i = 0; i < 10; i++) {
3700 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3732 for (i = 0; i < 2; i++) {
3737 for (i = 0; i < 8; i++) {
3759 int res, tile_row, tile_col, i, ref, row, col;
3760 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3765 }
else if (res == 0) {
3794 for (i = 0; i < 8; i++) {
3834 "Failed to allocate block buffers\n");
3840 for (i = 0; i < 4; i++) {
3841 for (j = 0; j < 2; j++)
3842 for (k = 0; k < 2; k++)
3843 for (l = 0; l < 6; l++)
3844 for (m = 0; m < 6; m++)
3881 if (tile_size > size) {
3896 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3898 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3916 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
3920 col < s->tiling.tile_col_end;
3921 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3925 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
3937 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
3947 if (row + 8 < s->
rows) {
3949 f->
data[0] + yoff + 63 * ls_y,
3952 f->
data[1] + uvoff + 31 * ls_uv,
3955 f->
data[2] + uvoff + 31 * ls_uv,
3963 lflvl_ptr = s->
lflvl;
3964 for (col = 0; col < s->
cols;
3965 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3981 }
while (s->
pass++ == 1);
3985 for (i = 0; i < 8; i++) {
4005 for (i = 0; i < 2; i++)
4007 for (i = 0; i < 8; i++)
4016 for (i = 0; i < 2; i++) {
4024 for (i = 0; i < 8; i++) {
4062 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4066 for (i = 0; i < 2; i++) {
4069 if (ssrc->frames[i].tf.f->data[0]) {
4074 for (i = 0; i < 8; i++) {
4077 if (ssrc->next_refs[i].f->data[0]) {
4088 if (ssrc->segmentation.enabled) {