35 #define VP9_SYNCCODE 0x498342
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
325 s->
cols = (w + 7) >> 3;
326 s->
rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
470 int c, i, j, k, l,
m,
n, w, h, max, size2,
res, sharp;
599 for (i = 0; i < 4; i++)
602 for (i = 0; i < 2; i++)
621 for (i = 0; i < 7; i++)
625 for (i = 0; i < 3; i++)
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
641 for (i = 0; i < 8; i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
670 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
691 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
710 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
711 max =
FFMAX(0, max - 1);
746 if (size2 > size - (data2 - data)) {
775 for (i = 0; i < 2; i++)
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
792 for (i = 0; i < 4; i++) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
801 if (m >= 3 && l == 0)
803 for (n = 0; n < 3; n++) {
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
830 for (i = 0; i < 3; i++)
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
847 for (i = 0; i < 4; i++)
856 for (i = 0; i < 5; i++)
865 for (i = 0; i < 5; i++) {
876 for (i = 0; i < 5; i++)
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
896 for (i = 0; i < 3; i++)
900 for (i = 0; i < 2; i++) {
904 for (j = 0; j < 10; j++)
912 for (j = 0; j < 10; j++)
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
925 for (j = 0; j < 3; j++)
932 for (i = 0; i < 2; i++) {
944 return (data2 - data) + size2;
955 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
957 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
958 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->
row, col = s->
col, row7 = s->
row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
988 #define INVALID_MV 0x80008000U
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1009 }
else if (sb == 3) {
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1047 if (mv->
ref[0] == ref) {
1049 }
else if (mv->
ref[1] == ref) {
1055 if (mv->
ref[0] == ref) {
1057 }
else if (mv->
ref[1] == ref) {
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col,
r = p[i][1] + row;
1073 if (mv->
ref[0] == ref) {
1075 }
else if (mv->
ref[1] == ref) {
1087 if (mv->
ref[0] == ref) {
1089 }
else if (mv->
ref[1] == ref) {
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col,
r = p[i][1] + row;
1111 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1114 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1128 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1131 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1142 #undef RETURN_SCALE_MV
1156 for (n = 0, m = 0; m <
c; m++) {
1182 n = (n << 3) | (bit << 1);
1195 return sign ? -(n + 1) : (n + 1);
1210 mode ==
NEWMV ? -1 : sb);
1212 if ((mode ==
NEWMV || sb == -1) &&
1227 if (mode ==
NEWMV) {
1241 mode ==
NEWMV ? -1 : sb);
1242 if ((mode ==
NEWMV || sb == -1) &&
1257 if (mode ==
NEWMV) {
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1331 int row = s->
row, col = s->
col, row7 = s->
row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1336 int vref, filter_id;
1352 for (
y = 0;
y < h4;
y++)
1353 for (x = 0; x < w4; x++)
1354 pred =
FFMIN(pred, refsegmap[(
y + row) * 8 * s->
sb_cols + x + col]);
1388 if (have_a && have_l) {
1412 }
else if (have_l) {
1460 l[0] = a[1] = b->
mode[1];
1462 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1470 l[1] = a[1] = b->
mode[3];
1472 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1476 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1488 }
else if (b->
intra) {
1517 static const uint8_t size_group[10] = {
1518 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1520 int sz = size_group[b->
bs];
1531 static const uint8_t inter_mode_ctx_lut[14][14] = {
1532 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1533 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1534 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1535 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1543 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1544 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1545 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1580 }
else if (have_l) {
1611 if (refl == refa && refa == s->
varcompref[1]) {
1618 c = (refa == refl) ? 3 : 1;
1635 c = (refl == refa) ? 4 : 2;
1647 }
else if (have_l) {
1773 }
else if (have_l) {
1787 b->
ref[0] = 1 + bit;
1797 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1893 #define SPLAT_CTX(var, val, n) \
1895 case 1: var = val; break; \
1896 case 2: AV_WN16A(&var, val * 0x0101); break; \
1897 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1898 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1900 uint64_t v64 = val * 0x0101010101010101ULL; \
1901 AV_WN64A( &var, v64); \
1902 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1907 #define SPLAT_CTX(var, val, n) \
1909 case 1: var = val; break; \
1910 case 2: AV_WN16A(&var, val * 0x0101); break; \
1911 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 uint32_t v32 = val * 0x01010101; \
1914 AV_WN32A( &var, v32); \
1915 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1919 uint32_t v32 = val * 0x01010101; \
1920 AV_WN32A( &var, v32); \
1921 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1922 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1923 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1930 #define SET_CTXS(dir, off, n) \
1932 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1933 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1934 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1935 if (!s->keyframe && !s->intraonly) { \
1936 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1937 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1938 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1940 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1941 if (s->filtermode == FILTER_SWITCHABLE) { \
1942 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1947 case 1:
SET_CTXS(above, col, 1);
break;
1948 case 2:
SET_CTXS(above, col, 2);
break;
1949 case 4:
SET_CTXS(above, col, 4);
break;
1950 case 8:
SET_CTXS(above, col, 8);
break;
1953 case 1:
SET_CTXS(left, row7, 1);
break;
1954 case 2:
SET_CTXS(left, row7, 2);
break;
1955 case 4:
SET_CTXS(left, row7, 4);
break;
1956 case 8:
SET_CTXS(left, row7, 8);
break;
1976 for (n = 0; n < w4 * 2; n++) {
1980 for (n = 0; n < h4 * 2; n++) {
1988 for (
y = 0;
y < h4;
y++) {
1989 int x, o = (row +
y) * s->
sb_cols * 8 + col;
1993 for (x = 0; x < w4; x++) {
1997 }
else if (b->
comp) {
1998 for (x = 0; x < w4; x++) {
1999 mv[x].ref[0] = b->
ref[0];
2000 mv[x].ref[1] = b->
ref[1];
2005 for (x = 0; x < w4; x++) {
2006 mv[x].ref[0] = b->
ref[0];
2017 int is_tx32x32,
unsigned (*cnt)[6][3],
2018 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2019 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2020 const int16_t *band_counts,
const int16_t *qmul)
2022 int i = 0,
band = 0, band_left = band_counts[
band];
2036 cnt[
band][nnz][0]++;
2038 band_left = band_counts[++
band];
2040 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2042 if (++i == n_coeffs)
2049 cnt[
band][nnz][1]++;
2057 cnt[
band][nnz][2]++;
2060 cache[rc] = val = 2;
2111 band_left = band_counts[++
band];
2116 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2118 }
while (++i < n_coeffs);
2124 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2125 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2126 const int16_t (*nb)[2],
const int16_t *band_counts,
2127 const int16_t *qmul)
2130 nnz, scan, nb, band_counts, qmul);
2134 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2135 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2136 const int16_t (*nb)[2],
const int16_t *band_counts,
2137 const int16_t *qmul)
2140 nnz, scan, nb, band_counts, qmul);
2147 int row = s->
row, col = s->
col;
2152 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2153 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2154 int n, pl, x,
y,
res;
2157 const int16_t *
const *yscans =
vp9_scans[tx];
2163 static const int16_t band_counts[4][8] = {
2164 { 1, 2, 3, 4, 3, 16 - 13 },
2165 { 1, 2, 3, 4, 11, 64 - 21 },
2166 { 1, 2, 3, 4, 11, 256 - 21 },
2167 { 1, 2, 3, 4, 11, 1024 - 21 },
2169 const int16_t *y_band_counts = band_counts[b->tx];
2170 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2172 #define MERGE(la, end, step, rd) \
2173 for (n = 0; n < end; n += step) \
2174 la[n] = !!rd(&la[n])
2175 #define MERGE_CTX(step, rd) \
2177 MERGE(l, end_y, step, rd); \
2178 MERGE(a, end_x, step, rd); \
2181 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2182 for (n = 0, y = 0; y < end_y; y += step) { \
2183 for (x = 0; x < end_x; x += step, n += step * step) { \
2184 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2185 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2186 c, e, p, a[x] + l[y], yscans[txtp], \
2187 ynbs[txtp], y_band_counts, qmul[0]); \
2188 a[x] = l[y] = !!res; \
2190 AV_WN16A(&s->eob[n], res); \
2197 #define SPLAT(la, end, step, cond) \
2199 for (n = 1; n < end; n += step) \
2200 la[n] = la[n - 1]; \
2201 } else if (step == 4) { \
2203 for (n = 0; n < end; n += step) \
2204 AV_WN32A(&la[n], la[n] * 0x01010101); \
2206 for (n = 0; n < end; n += step) \
2207 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2211 if (HAVE_FAST_64BIT) { \
2212 for (n = 0; n < end; n += step) \
2213 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2215 for (n = 0; n < end; n += step) { \
2216 uint32_t v32 = la[n] * 0x01010101; \
2217 AV_WN32A(&la[n], v32); \
2218 AV_WN32A(&la[n + 4], v32); \
2222 for (n = 0; n < end; n += step) \
2223 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2226 #define SPLAT_CTX(step) \
2228 SPLAT(a, end_x, step, end_x == w4); \
2229 SPLAT(l, end_y, step, end_y == h4); \
2254 #define DECODE_UV_COEF_LOOP(step) \
2255 for (n = 0, y = 0; y < end_y; y += step) { \
2256 for (x = 0; x < end_x; x += step, n += step * step) { \
2257 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2258 16 * step * step, c, e, p, a[x] + l[y], \
2259 uvscan, uvnb, uv_band_counts, qmul[1]); \
2260 a[x] = l[y] = !!res; \
2261 s->uveob[pl][n] = res; \
2272 for (pl = 0; pl < 2; pl++) {
2294 1024, c, e, p, a[0] + l[0],
2295 uvscan, uvnb, uv_band_counts, qmul[1]);
2296 a[0] = l[0] = !!
res;
2305 uint8_t *dst_edge, ptrdiff_t stride_edge,
2306 uint8_t *dst_inner, ptrdiff_t stride_inner,
2307 uint8_t *l,
int col,
int x,
int w,
2311 int have_top = row > 0 || y > 0;
2313 int have_right = x < w - 1;
2314 static const uint8_t mode_conv[10][2 ][2 ] = {
2336 static const struct {
2344 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2347 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2348 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2351 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2360 mode = mode_conv[
mode][have_left][have_top];
2361 if (edges[mode].needs_top) {
2363 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !p) - x) * 4;
2364 int n_px_need_tr = 0;
2366 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2373 top = !(row & 7) && !y ?
2375 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2377 topleft = !(row & 7) && !y ?
2379 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2380 &dst_inner[-stride_inner];
2384 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2385 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2386 n_px_need + n_px_need_tr <= n_px_have) {
2390 if (n_px_need <= n_px_have) {
2391 memcpy(*a, top, n_px_need);
2393 memcpy(*a, top, n_px_have);
2394 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2395 n_px_need - n_px_have);
2398 memset(*a, 127, n_px_need);
2400 if (edges[mode].needs_topleft) {
2401 if (have_left && have_top) {
2402 (*a)[-1] = topleft[-1];
2404 (*a)[-1] = have_top ? 129 : 127;
2407 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2408 if (have_top && have_right &&
2409 n_px_need + n_px_need_tr <= n_px_have) {
2410 memcpy(&(*a)[4], &top[4], 4);
2412 memset(&(*a)[4], (*a)[3], 4);
2417 if (edges[mode].needs_left) {
2419 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !p) -
y) * 4;
2420 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2421 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2423 if (n_px_need <= n_px_have) {
2424 for (i = 0; i < n_px_need; i++)
2425 l[n_px_need - 1 - i] = dst[i * stride - 1];
2427 for (i = 0; i < n_px_have; i++)
2428 l[n_px_need - 1 - i] = dst[i * stride - 1];
2429 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2432 memset(l, 129, 4 << tx);
2443 int row = s->
row, col = s->
col;
2444 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2445 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2446 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2447 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2449 int uvstep1d = 1 << b->
uvtx, p;
2454 for (
n = 0, y = 0; y < end_y; y += step1d) {
2455 uint8_t *ptr = dst, *ptr_r = dst_r;
2456 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2457 ptr_r += 4 * step1d,
n += step) {
2467 col, x, w4, row, y, b->tx, 0);
2482 step = 1 << (b->
uvtx * 2);
2483 for (p = 0; p < 2; p++) {
2484 dst = s->
dst[1 + p];
2486 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2487 uint8_t *ptr = dst, *ptr_r = dst_r;
2488 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2489 ptr_r += 4 * uvstep1d,
n += step) {
2497 col, x, w4, row, y, b->
uvtx, p + 1);
2510 uint8_t *dst, ptrdiff_t dst_stride,
2511 const uint8_t *ref, ptrdiff_t ref_stride,
2513 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2514 int bw,
int bh,
int w,
int h)
2516 int mx = mv->
x, my = mv->
y,
th;
2520 ref += y * ref_stride + x;
2526 th = (y + bh + 4 * !!my + 7) >> 6;
2528 if (x < !!mx * 3 || y < !!my * 3 ||
2529 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2531 ref - !!my * 3 * ref_stride - !!mx * 3,
2533 bw + !!mx * 7, bh + !!my * 7,
2534 x - !!mx * 3, y - !!my * 3, w, h);
2538 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2543 ptrdiff_t dst_stride,
2544 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2545 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2547 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2548 int bw,
int bh,
int w,
int h)
2550 int mx = mv->
x, my = mv->
y,
th;
2554 ref_u += y * src_stride_u + x;
2555 ref_v += y * src_stride_v + x;
2561 th = (y + bh + 4 * !!my + 7) >> 5;
2563 if (x < !!mx * 3 || y < !!my * 3 ||
2564 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2566 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2568 bw + !!mx * 7, bh + !!my * 7,
2569 x - !!mx * 3, y - !!my * 3, w, h);
2571 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2574 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2576 bw + !!mx * 7, bh + !!my * 7,
2577 x - !!mx * 3, y - !!my * 3, w, h);
2579 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2581 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2582 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2589 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2590 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2594 int row = s->
row, col = s->
col;
2612 row << 3, col << 3, &b->
mv[0][0], 8, 4, w1, h1);
2614 s->
dst[0] + 4 * ls_y, ls_y,
2616 (row << 3) + 4, col << 3, &b->
mv[2][0], 8, 4, w1, h1);
2620 ref2->data[0], ref2->linesize[0], tref2,
2621 row << 3, col << 3, &b->
mv[0][1], 8, 4, w2, h2);
2623 s->
dst[0] + 4 * ls_y, ls_y,
2624 ref2->data[0], ref2->linesize[0], tref2,
2625 (row << 3) + 4, col << 3, &b->
mv[2][1], 8, 4, w2, h2);
2630 row << 3, col << 3, &b->
mv[0][0], 4, 8, w1, h1);
2633 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 8, w1, h1);
2637 ref2->data[0], ref2->linesize[0], tref2,
2638 row << 3, col << 3, &b->
mv[0][1], 4, 8, w2, h2);
2640 ref2->data[0], ref2->linesize[0], tref2,
2641 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 8, w2, h2);
2650 row << 3, col << 3, &b->
mv[0][0], 4, 4, w1, h1);
2653 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 4, w1, h1);
2655 s->
dst[0] + 4 * ls_y, ls_y,
2657 (row << 3) + 4, col << 3, &b->
mv[2][0], 4, 4, w1, h1);
2659 s->
dst[0] + 4 * ls_y + 4, ls_y,
2661 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][0], 4, 4, w1, h1);
2665 ref2->data[0], ref2->linesize[0], tref2,
2666 row << 3, col << 3, &b->
mv[0][1], 4, 4, w2, h2);
2668 ref2->data[0], ref2->linesize[0], tref2,
2669 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 4, w2, h2);
2671 s->
dst[0] + 4 * ls_y, ls_y,
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 (row << 3) + 4, col << 3, &b->
mv[2][1], 4, 4, w2, h2);
2675 s->
dst[0] + 4 * ls_y + 4, ls_y,
2676 ref2->data[0], ref2->linesize[0], tref2,
2677 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][1], 4, 4, w2, h2);
2681 int bwl = bwlog_tab[0][b->
bs];
2686 row << 3, col << 3, &b->
mv[0][0],bw, bh, w1, h1);
2690 ref2->data[0], ref2->linesize[0], tref2,
2691 row << 3, col << 3, &b->
mv[0][1], bw, bh, w2, h2);
2696 int bwl = bwlog_tab[1][b->
bs];
2714 s->
dst[1], s->
dst[2], ls_uv,
2717 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2727 s->
dst[1], s->
dst[2], ls_uv,
2728 ref2->data[1], ref2->linesize[1],
2729 ref2->data[2], ref2->linesize[2], tref2,
2730 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2737 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2738 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2739 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2740 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2742 int uvstep1d = 1 << b->
uvtx, p;
2746 for (
n = 0, y = 0; y < end_y; y += step1d) {
2748 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
n += step) {
2763 step = 1 << (b->
uvtx * 2);
2764 for (p = 0; p < 2; p++) {
2765 dst = s->
dst[p + 1];
2766 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2768 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
n += step) {
2782 int row_and_7,
int col_and_7,
2783 int w,
int h,
int col_end,
int row_end,
2796 if (tx ==
TX_4X4 && is_uv) {
2811 if (tx ==
TX_4X4 && !skip_inter) {
2812 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
2813 int m_col_odd = (t << (w - 1)) -
t;
2817 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2819 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2820 int col_mask_id = 2 - !(
y & 7);
2822 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2823 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2834 if ((col_end & 1) && (
y & 1)) {
2835 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col_odd;
2837 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2841 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2843 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2844 int col_mask_id = 2 - !(
y & 3);
2846 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2847 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2848 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2849 lflvl->
mask[is_uv][0][
y][3] |= m_col;
2850 lflvl->
mask[is_uv][1][
y][3] |= m_col;
2854 int y,
t = 1 << col_and_7, m_col = (t << w) - t;
2857 int mask_id = (tx ==
TX_8X8);
2858 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2859 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2860 int m_row = m_col & masks[l2];
2864 if (is_uv && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
2865 int m_row_16 = ((t << (w - 1)) -
t) & masks[l2];
2866 int m_row_8 = m_row - m_row_16;
2868 for (y = row_and_7; y < h + row_and_7; y++) {
2869 lflvl->
mask[is_uv][0][
y][0] |= m_row_16;
2870 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2873 for (y = row_and_7; y < h + row_and_7; y++)
2874 lflvl->
mask[is_uv][0][y][mask_id] |= m_row;
2877 if (is_uv && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
2878 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2879 lflvl->
mask[is_uv][1][y][0] |= m_col;
2880 if (y - row_and_7 == h - 1)
2881 lflvl->
mask[is_uv][1][
y][1] |= m_col;
2883 for (y = row_and_7; y < h + row_and_7; y += step1d)
2884 lflvl->
mask[is_uv][1][y][mask_id] |= m_col;
2886 }
else if (tx !=
TX_4X4) {
2889 mask_id = (tx ==
TX_8X8) || (is_uv && h == 1);
2890 lflvl->
mask[is_uv][1][row_and_7][mask_id] |= m_col;
2891 mask_id = (tx ==
TX_8X8) || (is_uv && w == 1);
2892 for (y = row_and_7; y < h + row_and_7; y++)
2893 lflvl->
mask[is_uv][0][y][mask_id] |= t;
2895 int t8 = t & 0x01,
t4 = t -
t8;
2897 for (y = row_and_7; y < h + row_and_7; y++) {
2898 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2899 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2901 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2903 int t8 = t & 0x11,
t4 = t -
t8;
2905 for (y = row_and_7; y < h + row_and_7; y++) {
2906 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2907 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2909 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2915 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2929 s->
min_mv.
x = -(128 + col * 64);
2930 s->
min_mv.
y = -(128 + row * 64);
2938 b->
uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2945 #define SPLAT_ZERO_CTX(v, n) \
2947 case 1: v = 0; break; \
2948 case 2: AV_ZERO16(&v); break; \
2949 case 4: AV_ZERO32(&v); break; \
2950 case 8: AV_ZERO64(&v); break; \
2951 case 16: AV_ZERO128(&v); break; \
2953 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2955 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2956 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2957 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2975 s->
block += w4 * h4 * 64;
2976 s->
uvblock[0] += w4 * h4 * 16;
2977 s->
uvblock[1] += w4 * h4 * 16;
2978 s->
eob += 4 * w4 * h4;
2979 s->
uveob[0] += w4 * h4;
2980 s->
uveob[1] += w4 * h4;
2989 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
2990 (row + h4) > s->
rows;
2991 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
2992 (row + h4) > s->
rows;
2997 s->
dst[0] = f->
data[0] + yoff;
3005 s->
dst[1] = f->
data[1] + uvoff;
3006 s->
dst[2] = f->
data[2] + uvoff;
3017 for (
n = 0; o < w;
n++) {
3023 s->
tmp_y + o, 64, h, 0, 0);
3031 for (
n = 1; o < w;
n++) {
3037 s->
tmp_uv[0] + o, 32, h, 0, 0);
3039 s->
tmp_uv[1] + o, 32, h, 0, 0);
3053 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3054 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3057 b->
uvtx, skip_inter);
3064 limit >>= (sharp + 3) >> 2;
3065 limit =
FFMIN(limit, 9 - sharp);
3067 limit =
FFMAX(limit, 1);
3076 s->
block += w4 * h4 * 64;
3077 s->
uvblock[0] += w4 * h4 * 16;
3078 s->
uvblock[1] += w4 * h4 * 16;
3079 s->
eob += 4 * w4 * h4;
3080 s->
uveob[0] += w4 * h4;
3081 s->
uveob[1] += w4 * h4;
3086 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3094 ptrdiff_t hbs = 4 >> bl;
3100 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3101 }
else if (col + hbs < s->cols) {
3102 if (row + hbs < s->rows) {
3106 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3109 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3110 yoff += hbs * 8 * y_stride;
3111 uvoff += hbs * 4 * uv_stride;
3112 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3115 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3118 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3121 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3123 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3124 yoff += hbs * 8 * y_stride;
3125 uvoff += hbs * 4 * uv_stride;
3126 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3127 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3128 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3135 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3137 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3140 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3142 }
else if (row + hbs < s->rows) {
3145 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3146 yoff += hbs * 8 * y_stride;
3147 uvoff += hbs * 4 * uv_stride;
3148 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3151 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3155 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3161 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3165 ptrdiff_t hbs = 4 >> bl;
3171 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3172 }
else if (s->
b->
bl == bl) {
3173 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3175 yoff += hbs * 8 * y_stride;
3176 uvoff += hbs * 4 * uv_stride;
3177 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3181 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3185 if (col + hbs < s->cols) {
3186 if (row + hbs < s->rows) {
3188 uvoff + 4 * hbs, bl + 1);
3189 yoff += hbs * 8 * y_stride;
3190 uvoff += hbs * 4 * uv_stride;
3191 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3193 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3197 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3199 }
else if (row + hbs < s->rows) {
3200 yoff += hbs * 8 * y_stride;
3201 uvoff += hbs * 4 * uv_stride;
3202 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3208 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3223 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3224 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[0][0][
y];
3226 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3227 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3228 unsigned hm = hm1 | hm2 | hm13 | hm23;
3230 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3232 int L = *l,
H = L >> 4;
3236 if (hmask1[0] & x) {
3237 if (hmask2[0] & x) {
3243 }
else if (hm2 & x) {
3250 [0](ptr, ls_y,
E, I,
H);
3253 [0](ptr, ls_y, E, I, H);
3256 }
else if (hm2 & x) {
3257 int L = l[8],
H = L >> 4;
3262 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3266 int L = *l,
H = L >> 4;
3278 }
else if (hm23 & x) {
3279 int L = l[8],
H = L >> 4;
3290 dst = f->
data[0] + yoff;
3292 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3293 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[0][1][
y];
3294 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3296 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3299 int L = *l,
H = L >> 4;
3303 if (vmask[0] & (x << 1)) {
3309 }
else if (vm & (x << 1)) {
3315 [!!(vmask[1] & (x << 1))]
3316 [1](ptr, ls_y, E, I, H);
3319 [1](ptr, ls_y, E, I, H);
3321 }
else if (vm & (x << 1)) {
3322 int L = l[1],
H = L >> 4;
3326 [1](ptr + 8, ls_y,
E, I,
H);
3330 int L = *l,
H = L >> 4;
3333 if (vm3 & (x << 1)) {
3342 }
else if (vm3 & (x << 1)) {
3343 int L = l[1],
H = L >> 4;
3352 for (p = 0; p < 2; p++) {
3354 dst = f->
data[1 + p] + uvoff;
3355 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3356 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[1][0][
y];
3358 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3359 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3361 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3364 int L = *l,
H = L >> 4;
3367 if (hmask1[0] & x) {
3368 if (hmask2[0] & x) {
3374 }
else if (hm2 & x) {
3381 [0](ptr, ls_uv,
E, I,
H);
3384 [0](ptr, ls_uv, E, I, H);
3386 }
else if (hm2 & x) {
3387 int L = l[16],
H = L >> 4;
3391 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3399 dst = f->
data[1 + p] + uvoff;
3400 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3401 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[1][1][
y];
3402 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3404 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3407 int L = *l,
H = L >> 4;
3411 if (vmask[0] & (x << 2)) {
3417 }
else if (vm & (x << 2)) {
3423 [!!(vmask[1] & (x << 2))]
3424 [1](ptr, ls_uv, E, I, H);
3427 [1](ptr, ls_uv, E, I, H);
3429 }
else if (vm & (x << 2)) {
3430 int L = l[2],
H = L >> 4;
3434 [1](ptr + 8, ls_uv,
E, I,
H);
3446 int sb_start = ( idx *
n) >> log2_n;
3447 int sb_end = ((idx + 1) * n) >> log2_n;
3448 *start =
FFMIN(sb_start, n) << 3;
3449 *end =
FFMIN(sb_end, n) << 3;
3453 int max_count,
int update_factor)
3455 unsigned ct = ct0 + ct1, p2, p1;
3461 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3462 p2 = av_clip(p2, 1, 255);
3463 ct =
FFMIN(ct, max_count);
3464 update_factor =
FASTDIV(update_factor * ct, max_count);
3467 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3477 for (i = 0; i < 4; i++)
3478 for (j = 0; j < 2; j++)
3479 for (k = 0; k < 2; k++)
3480 for (l = 0; l < 6; l++)
3481 for (m = 0; m < 6; m++) {
3486 if (l == 0 && m >= 3)
3490 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3503 for (i = 0; i < 3; i++)
3507 for (i = 0; i < 4; i++)
3512 for (i = 0; i < 5; i++)
3518 for (i = 0; i < 5; i++)
3524 for (i = 0; i < 5; i++) {
3528 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3529 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3534 for (i = 0; i < 4; i++)
3535 for (j = 0; j < 4; j++) {
3539 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3540 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3546 for (i = 0; i < 2; i++) {
3552 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3560 for (i = 0; i < 4; i++) {
3564 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3570 for (i = 0; i < 7; i++) {
3574 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3575 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3584 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3585 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3590 for (i = 0; i < 2; i++) {
3592 unsigned *
c, (*c2)[2], sum;
3599 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3604 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3607 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3611 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3619 for (j = 0; j < 10; j++)
3620 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3622 for (j = 0; j < 2; j++) {
3625 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3626 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3631 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3632 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3644 for (i = 0; i < 4; i++) {
3648 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3668 for (i = 0; i < 10; i++) {
3672 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3704 for (i = 0; i < 2; i++) {
3709 for (i = 0; i < 8; i++) {
3731 int res, tile_row, tile_col, i, ref, row, col;
3732 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3737 }
else if (res == 0) {
3766 for (i = 0; i < 8; i++) {
3794 "Failed to allocate block buffers\n");
3800 for (i = 0; i < 4; i++) {
3801 for (j = 0; j < 2; j++)
3802 for (k = 0; k < 2; k++)
3803 for (l = 0; l < 6; l++)
3804 for (m = 0; m < 6; m++)
3839 if (tile_size > size) {
3854 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3856 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3874 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
3878 col < s->tiling.tile_col_end;
3879 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3883 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
3895 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
3905 if (row + 8 < s->
rows) {
3907 f->
data[0] + yoff + 63 * ls_y,
3910 f->
data[1] + uvoff + 31 * ls_uv,
3913 f->
data[2] + uvoff + 31 * ls_uv,
3921 lflvl_ptr = s->
lflvl;
3922 for (col = 0; col < s->
cols;
3923 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3939 }
while (s->
pass++ == 1);
3943 for (i = 0; i < 8; i++) {
3963 for (i = 0; i < 2; i++)
3965 for (i = 0; i < 8; i++)
3974 for (i = 0; i < 2; i++) {
3982 for (i = 0; i < 8; i++) {
4020 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4024 for (i = 0; i < 2; i++) {
4027 if (ssrc->frames[i].tf.f->data[0]) {
4032 for (i = 0; i < 8; i++) {
4035 if (ssrc->next_refs[i].f->data[0]) {
4046 if (ssrc->segmentation.enabled) {