37 #define VP9_SYNCCODE 0x498342
120 unsigned coef[4][2][2][6][6][3];
121 unsigned eob[4][2][2][6][6][2];
169 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
170 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
172 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
173 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
249 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
257 if (!(s->pix_fmt == s->
gf_fmt && w == s->
w && h == s->
h)) {
262 #if CONFIG_VP9_DXVA2_HWACCEL
265 #if CONFIG_VP9_D3D11VA_HWACCEL
268 #if CONFIG_VP9_VAAPI_HWACCEL
273 *fmtp++ = s->pix_fmt;
292 s->last_fmt = s->pix_fmt;
295 s->
cols = (w + 7) >> 3;
296 s->
rows = (h + 7) >> 3;
298 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
341 int chroma_blocks, chroma_eobs, bytesperpixel = s->
bytesperpixel;
348 chroma_blocks = 64 * 64 >> (s->
ss_h + s->
ss_v);
349 chroma_eobs = 16 * 16 >> (s->
ss_h + s->
ss_v);
355 16 * 16 + 2 * chroma_eobs) * sbs);
366 16 * 16 + 2 * chroma_eobs);
389 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
395 static const int inv_map_table[255] = {
396 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
397 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
398 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
399 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
400 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
401 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
402 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
403 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
404 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
405 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
406 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
407 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
408 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
409 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
410 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
411 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
412 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
413 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
461 s->
bpp = 8 + bits * 2;
470 s->pix_fmt = pix_fmt_rgb[
bits];
482 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
506 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
517 int c, i, j, k, l,
m,
n, w,
h, max, size2, res, sharp;
666 for (i = 0; i < 4; i++)
669 for (i = 0; i < 2; i++)
688 for (i = 0; i < 7; i++)
692 for (i = 0; i < 3; i++)
700 for (i = 0; i < 8; i++) {
714 int qyac, qydc, quvac, quvdc, lflvl, sh;
727 qyac = av_clip_uintp2(qyac, 8);
747 for (j = 1; j < 4; j++) {
770 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
771 max =
FFMAX(0, max - 1);
792 for (i = 0; i < 3; i++) {
798 "Ref pixfmt (%s) did not match current frame (%s)",
802 }
else if (refw == w && refh == h) {
805 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
807 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
811 s->
mvscale[i][0] = (refw << 14) / w;
812 s->
mvscale[i][1] = (refh << 14) / h;
841 if (size2 > size - (data2 - data)) {
871 for (i = 0; i < 2; i++)
874 for (i = 0; i < 2; i++)
875 for (j = 0; j < 2; j++)
879 for (i = 0; i < 2; i++)
880 for (j = 0; j < 3; j++)
888 for (i = 0; i < 4; i++) {
891 for (j = 0; j < 2; j++)
892 for (k = 0; k < 2; k++)
893 for (l = 0; l < 6; l++)
894 for (m = 0; m < 6; m++) {
897 if (m >= 3 && l == 0)
899 for (n = 0; n < 3; n++) {
909 for (j = 0; j < 2; j++)
910 for (k = 0; k < 2; k++)
911 for (l = 0; l < 6; l++)
912 for (m = 0; m < 6; m++) {
926 for (i = 0; i < 3; i++)
930 for (i = 0; i < 7; i++)
931 for (j = 0; j < 3; j++)
937 for (i = 0; i < 4; i++)
938 for (j = 0; j < 2; j++)
943 for (i = 0; i < 4; i++)
952 for (i = 0; i < 5; i++)
961 for (i = 0; i < 5; i++) {
972 for (i = 0; i < 5; i++)
978 for (i = 0; i < 4; i++)
979 for (j = 0; j < 9; j++)
984 for (i = 0; i < 4; i++)
985 for (j = 0; j < 4; j++)
986 for (k = 0; k < 3; k++)
992 for (i = 0; i < 3; i++)
996 for (i = 0; i < 2; i++) {
1000 for (j = 0; j < 10; j++)
1008 for (j = 0; j < 10; j++)
1014 for (i = 0; i < 2; i++) {
1015 for (j = 0; j < 2; j++)
1016 for (k = 0; k < 3; k++)
1021 for (j = 0; j < 3; j++)
1028 for (i = 0; i < 2; i++) {
1040 return (data2 - data) + size2;
1051 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
1053 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
1054 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1055 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1056 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1057 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1058 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1059 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1060 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1061 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1062 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1063 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1064 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1065 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1066 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1067 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1068 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1069 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1070 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1071 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1072 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1073 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1074 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1075 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1076 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1077 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1078 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1079 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1082 int row = s->
row, col = s->
col, row7 = s->
row7;
1083 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
1084 #define INVALID_MV 0x80008000U
1088 #define RETURN_DIRECT_MV(mv) \
1090 uint32_t m = AV_RN32A(&mv); \
1094 } else if (mem == INVALID_MV) { \
1096 } else if (m != mem) { \
1103 if (sb == 2 || sb == 1) {
1105 }
else if (sb == 3) {
1111 #define RETURN_MV(mv) \
1116 av_assert2(idx == 1); \
1117 av_assert2(mem != INVALID_MV); \
1118 if (mem_sub8x8 == INVALID_MV) { \
1119 clamp_mv(&tmp, &mv, s); \
1120 m = AV_RN32A(&tmp); \
1125 mem_sub8x8 = AV_RN32A(&mv); \
1126 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1127 clamp_mv(&tmp, &mv, s); \
1128 m = AV_RN32A(&tmp); \
1138 uint32_t m = AV_RN32A(&mv); \
1140 clamp_mv(pmv, &mv, s); \
1142 } else if (mem == INVALID_MV) { \
1144 } else if (m != mem) { \
1145 clamp_mv(pmv, &mv, s); \
1153 if (mv->
ref[0] == ref) {
1155 }
else if (mv->
ref[1] == ref) {
1161 if (mv->
ref[0] == ref) {
1163 }
else if (mv->
ref[1] == ref) {
1173 for (; i < 8; i++) {
1174 int c = p[i][0] + col,
r = p[i][1] + row;
1176 if (c >= s->
tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1179 if (mv->
ref[0] == ref) {
1181 }
else if (mv->
ref[1] == ref) {
1193 if (mv->
ref[0] == ref) {
1195 }
else if (mv->
ref[1] == ref) {
1200 #define RETURN_SCALE_MV(mv, scale) \
1203 VP56mv mv_temp = { -mv.x, -mv.y }; \
1204 RETURN_MV(mv_temp); \
1211 for (i = 0; i < 8; i++) {
1212 int c = p[i][0] + col,
r = p[i][1] + row;
1214 if (c >= s->
tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1217 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1220 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1234 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1237 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1249 #undef RETURN_SCALE_MV
1263 for (n = 0, m = 0; m <
c; m++) {
1289 n = (n << 3) | (bit << 1);
1302 return sign ? -(n + 1) : (n + 1);
1317 mode ==
NEWMV ? -1 : sb);
1319 if ((mode ==
NEWMV || sb == -1) &&
1334 if (mode ==
NEWMV) {
1348 mode ==
NEWMV ? -1 : sb);
1349 if ((mode ==
NEWMV || sb == -1) &&
1364 if (mode ==
NEWMV) {
1389 int v16 = v * 0x0101;
1397 uint32_t v32 = v * 0x01010101;
1406 uint64_t v64 = v * 0x0101010101010101ULL;
1412 uint32_t v32 = v * 0x01010101;
1427 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1430 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1438 int row = s->
row, col = s->
col, row7 = s->
row7;
1439 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1443 int vref, filter_id;
1461 for (y = 0; y < h4; y++) {
1462 int idx_base = (y + row) * 8 * s->
sb_cols + col;
1463 for (x = 0; x < w4; x++)
1464 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1502 if (have_a && have_l) {
1526 }
else if (have_l) {
1574 l[0] = a[1] = b->
mode[1];
1576 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1584 l[1] = a[1] = b->
mode[3];
1586 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1590 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1602 }
else if (b->
intra) {
1631 static const uint8_t size_group[10] = {
1632 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1634 int sz = size_group[b->
bs];
1645 static const uint8_t inter_mode_ctx_lut[14][14] = {
1646 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1647 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1648 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1649 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1650 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1651 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1652 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1653 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1654 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1657 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1658 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1659 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1694 }
else if (have_l) {
1732 c = (refa == refl) ? 3 : 1;
1749 c = (refl == refa) ? 4 : 2;
1761 }
else if (have_l) {
1887 }
else if (have_l) {
1901 b->
ref[0] = 1 + bit;
1910 static const uint8_t off[10] = {
1911 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
2007 #define SPLAT_CTX(var, val, n) \
2009 case 1: var = val; break; \
2010 case 2: AV_WN16A(&var, val * 0x0101); break; \
2011 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2012 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2014 uint64_t v64 = val * 0x0101010101010101ULL; \
2015 AV_WN64A( &var, v64); \
2016 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2021 #define SPLAT_CTX(var, val, n) \
2023 case 1: var = val; break; \
2024 case 2: AV_WN16A(&var, val * 0x0101); break; \
2025 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2027 uint32_t v32 = val * 0x01010101; \
2028 AV_WN32A( &var, v32); \
2029 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2033 uint32_t v32 = val * 0x01010101; \
2034 AV_WN32A( &var, v32); \
2035 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2036 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2037 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2044 #define SET_CTXS(dir, off, n) \
2046 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2047 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2048 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2049 if (!s->s.h.keyframe && !s->s.h.intraonly) { \
2050 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2051 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2052 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2054 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2055 if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
2056 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2061 case 1:
SET_CTXS(above, col, 1);
break;
2062 case 2:
SET_CTXS(above, col, 2);
break;
2063 case 4:
SET_CTXS(above, col, 4);
break;
2064 case 8:
SET_CTXS(above, col, 8);
break;
2067 case 1:
SET_CTXS(left, row7, 1);
break;
2068 case 2:
SET_CTXS(left, row7, 2);
break;
2069 case 4:
SET_CTXS(left, row7, 4);
break;
2070 case 8:
SET_CTXS(left, row7, 8);
break;
2090 for (n = 0; n < w4 * 2; n++) {
2094 for (n = 0; n < h4 * 2; n++) {
2102 for (y = 0; y < h4; y++) {
2103 int x, o = (row + y) * s->
sb_cols * 8 + col;
2107 for (x = 0; x < w4; x++) {
2111 }
else if (b->
comp) {
2112 for (x = 0; x < w4; x++) {
2113 mv[x].ref[0] = b->
ref[0];
2114 mv[x].ref[1] = b->
ref[1];
2119 for (x = 0; x < w4; x++) {
2120 mv[x].ref[0] = b->
ref[0];
2131 int is_tx32x32,
int is8bitsperpixel,
int bpp,
unsigned (*cnt)[6][3],
2132 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2133 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2134 const int16_t *band_counts,
const int16_t *qmul)
2136 int i = 0,
band = 0, band_left = band_counts[
band];
2150 cnt[
band][nnz][0]++;
2152 band_left = band_counts[++
band];
2154 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2156 if (++i == n_coeffs)
2163 cnt[
band][nnz][1]++;
2171 cnt[
band][nnz][2]++;
2174 cache[rc] = val = 2;
2208 if (!is8bitsperpixel) {
2233 #define STORE_COEF(c, i, v) do { \
2234 if (is8bitsperpixel) { \
2237 AV_WN32A(&c[i * 2], v); \
2241 band_left = band_counts[++
band];
2246 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2248 }
while (++i < n_coeffs);
2254 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2255 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2256 const int16_t (*nb)[2],
const int16_t *band_counts,
2257 const int16_t *qmul)
2260 nnz, scan, nb, band_counts, qmul);
2264 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2265 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2266 const int16_t (*nb)[2],
const int16_t *band_counts,
2267 const int16_t *qmul)
2270 nnz, scan, nb, band_counts, qmul);
2274 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2275 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2276 const int16_t (*nb)[2],
const int16_t *band_counts,
2277 const int16_t *qmul)
2280 nnz, scan, nb, band_counts, qmul);
2284 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2285 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2286 const int16_t (*nb)[2],
const int16_t *band_counts,
2287 const int16_t *qmul)
2290 nnz, scan, nb, band_counts, qmul);
2297 int row = s->
row, col = s->
col;
2302 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2303 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2304 int n, pl, x, y, res;
2307 const int16_t *
const *yscans =
vp9_scans[tx];
2313 static const int16_t band_counts[4][8] = {
2314 { 1, 2, 3, 4, 3, 16 - 13 },
2315 { 1, 2, 3, 4, 11, 64 - 21 },
2316 { 1, 2, 3, 4, 11, 256 - 21 },
2317 { 1, 2, 3, 4, 11, 1024 - 21 },
2319 const int16_t *y_band_counts = band_counts[b->tx];
2320 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2321 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2322 int total_coeff = 0;
2324 #define MERGE(la, end, step, rd) \
2325 for (n = 0; n < end; n += step) \
2326 la[n] = !!rd(&la[n])
2327 #define MERGE_CTX(step, rd) \
2329 MERGE(l, end_y, step, rd); \
2330 MERGE(a, end_x, step, rd); \
2333 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2334 for (n = 0, y = 0; y < end_y; y += step) { \
2335 for (x = 0; x < end_x; x += step, n += step * step) { \
2336 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2337 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2338 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2339 c, e, p, a[x] + l[y], yscans[txtp], \
2340 ynbs[txtp], y_band_counts, qmul[0]); \
2341 a[x] = l[y] = !!res; \
2342 total_coeff |= !!res; \
2344 AV_WN16A(&s->eob[n], res); \
2351 #define SPLAT(la, end, step, cond) \
2353 for (n = 1; n < end; n += step) \
2354 la[n] = la[n - 1]; \
2355 } else if (step == 4) { \
2357 for (n = 0; n < end; n += step) \
2358 AV_WN32A(&la[n], la[n] * 0x01010101); \
2360 for (n = 0; n < end; n += step) \
2361 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2365 if (HAVE_FAST_64BIT) { \
2366 for (n = 0; n < end; n += step) \
2367 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2369 for (n = 0; n < end; n += step) { \
2370 uint32_t v32 = la[n] * 0x01010101; \
2371 AV_WN32A(&la[n], v32); \
2372 AV_WN32A(&la[n + 4], v32); \
2376 for (n = 0; n < end; n += step) \
2377 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2380 #define SPLAT_CTX(step) \
2382 SPLAT(a, end_x, step, end_x == w4); \
2383 SPLAT(l, end_y, step, end_y == h4); \
2408 #define DECODE_UV_COEF_LOOP(step, v) \
2409 for (n = 0, y = 0; y < end_y; y += step) { \
2410 for (x = 0; x < end_x; x += step, n += step * step) { \
2411 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2412 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2413 16 * step * step, c, e, p, a[x] + l[y], \
2414 uvscan, uvnb, uv_band_counts, qmul[1]); \
2415 a[x] = l[y] = !!res; \
2416 total_coeff |= !!res; \
2418 AV_WN16A(&s->uveob[pl][n], res); \
2420 s->uveob[pl][n] = res; \
2432 for (pl = 0; pl < 2; pl++) {
2471 uint8_t *dst_edge, ptrdiff_t stride_edge,
2472 uint8_t *dst_inner, ptrdiff_t stride_inner,
2473 uint8_t *l,
int col,
int x,
int w,
2475 int p,
int ss_h,
int ss_v,
int bytesperpixel)
2477 int have_top = row > 0 || y > 0;
2479 int have_right = x < w - 1;
2481 static const uint8_t mode_conv[10][2 ][2 ] = {
2503 static const struct {
2512 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2515 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2516 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2518 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2519 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2528 mode = mode_conv[
mode][have_left][have_top];
2529 if (edges[mode].needs_top) {
2531 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !ss_h) - x) * 4;
2532 int n_px_need_tr = 0;
2534 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2541 top = !(row & 7) && !y ?
2543 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2545 topleft = !(row & 7) && !y ?
2547 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2548 &dst_inner[-stride_inner];
2552 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2553 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2554 n_px_need + n_px_need_tr <= n_px_have) {
2558 if (n_px_need <= n_px_have) {
2559 memcpy(*a, top, n_px_need * bytesperpixel);
2561 #define memset_bpp(c, i1, v, i2, num) do { \
2562 if (bytesperpixel == 1) { \
2563 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2565 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2566 for (n = 0; n < (num); n++) { \
2567 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2571 memcpy(*a, top, n_px_have * bytesperpixel);
2572 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2575 #define memset_val(c, val, num) do { \
2576 if (bytesperpixel == 1) { \
2577 memset((c), (val), (num)); \
2580 for (n = 0; n < (num); n++) { \
2581 AV_WN16A(&(c)[n * 2], (val)); \
2585 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2587 if (edges[mode].needs_topleft) {
2588 if (have_left && have_top) {
2589 #define assign_bpp(c, i1, v, i2) do { \
2590 if (bytesperpixel == 1) { \
2591 (c)[(i1)] = (v)[(i2)]; \
2593 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2598 #define assign_val(c, i, v) do { \
2599 if (bytesperpixel == 1) { \
2602 AV_WN16A(&(c)[(i) * 2], (v)); \
2605 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2608 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2609 if (have_top && have_right &&
2610 n_px_need + n_px_need_tr <= n_px_have) {
2611 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2618 if (edges[mode].needs_left) {
2620 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !ss_v) - y) * 4;
2621 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2622 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2624 if (edges[mode].invert_left) {
2625 if (n_px_need <= n_px_have) {
2626 for (i = 0; i < n_px_need; i++)
2629 for (i = 0; i < n_px_have; i++)
2631 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2634 if (n_px_need <= n_px_have) {
2635 for (i = 0; i < n_px_need; i++)
2636 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2638 for (i = 0; i < n_px_have; i++)
2639 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2640 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2644 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2652 ptrdiff_t uv_off,
int bytesperpixel)
2656 int row = s->
row, col = s->
col;
2657 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2658 int h4 =
bwh_tab[1][b->
bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2659 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2660 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2662 int uvstep1d = 1 << b->
uvtx, p;
2667 for (
n = 0, y = 0; y < end_y; y += step1d) {
2668 uint8_t *ptr = dst, *ptr_r = dst_r;
2669 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2670 ptr_r += 4 * step1d * bytesperpixel,
n += step) {
2680 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2684 s->
block + 16 * n * bytesperpixel, eob);
2694 step = 1 << (b->
uvtx * 2);
2695 for (p = 0; p < 2; p++) {
2696 dst = s->
dst[1 + p];
2698 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2699 uint8_t *ptr = dst, *ptr_r = dst_r;
2700 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2701 ptr_r += 4 * uvstep1d * bytesperpixel,
n += step) {
2708 ptr, s->
uv_stride, l, col, x, w4, row, y,
2713 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
2732 uint8_t *dst, ptrdiff_t dst_stride,
2733 const uint8_t *ref, ptrdiff_t ref_stride,
2735 ptrdiff_t y, ptrdiff_t x,
const VP56mv *
mv,
2736 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2738 int mx = mv->
x, my = mv->
y,
th;
2742 ref += y * ref_stride + x * bytesperpixel;
2748 th = (y + bh + 4 * !!my + 7) >> 6;
2750 if (x < !!mx * 3 || y < !!my * 3 ||
2751 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2753 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2755 bw + !!mx * 7, bh + !!my * 7,
2756 x - !!mx * 3, y - !!my * 3, w, h);
2760 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2765 ptrdiff_t dst_stride,
2766 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2767 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2769 ptrdiff_t y, ptrdiff_t x,
const VP56mv *
mv,
2770 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2772 int mx = mv->
x << !s->
ss_h, my = mv->
y << !s->
ss_v,
th;
2776 ref_u += y * src_stride_u + x * bytesperpixel;
2777 ref_v += y * src_stride_v + x * bytesperpixel;
2783 th = (y + bh + 4 * !!my + 7) >> (6 - s->
ss_v);
2785 if (x < !!mx * 3 || y < !!my * 3 ||
2786 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2788 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2790 bw + !!mx * 7, bh + !!my * 7,
2791 x - !!mx * 3, y - !!my * 3, w, h);
2792 ref_u = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2793 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2796 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2798 bw + !!mx * 7, bh + !!my * 7,
2799 x - !!mx * 3, y - !!my * 3, w, h);
2800 ref_v = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2801 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2803 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2804 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2808 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2809 px, py, pw, ph, bw, bh, w, h, i) \
2810 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2811 mv, bw, bh, w, h, bytesperpixel)
2812 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2813 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2814 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2815 row, col, mv, bw, bh, w, h, bytesperpixel)
2817 #define FN(x) x##_8bpp
2818 #define BYTES_PER_PIXEL 1
2821 #undef BYTES_PER_PIXEL
2822 #define FN(x) x##_16bpp
2823 #define BYTES_PER_PIXEL 2
2826 #undef mc_chroma_dir
2828 #undef BYTES_PER_PIXEL
2833 uint8_t *dst, ptrdiff_t dst_stride,
2834 const uint8_t *ref, ptrdiff_t ref_stride,
2836 ptrdiff_t y, ptrdiff_t x,
const VP56mv *in_mv,
2837 int px,
int py,
int pw,
int ph,
2838 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2839 const uint16_t *scale,
const uint8_t *step)
2844 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2846 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2848 int refbw_m1, refbh_m1;
2852 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2853 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2862 ref += y * ref_stride + x * bytesperpixel;
2865 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2866 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2870 th = (y + refbh_m1 + 4 + 7) >> 6;
2872 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2874 ref - 3 * ref_stride - 3 * bytesperpixel,
2876 refbw_m1 + 8, refbh_m1 + 8,
2877 x - 3, y - 3, w, h);
2881 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2888 ptrdiff_t dst_stride,
2889 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2890 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2892 ptrdiff_t y, ptrdiff_t x,
const VP56mv *in_mv,
2893 int px,
int py,
int pw,
int ph,
2894 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2895 const uint16_t *scale,
const uint8_t *step)
2900 ref_v, src_stride_v, ref_frame,
2901 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2904 int refbw_m1, refbh_m1;
2910 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 4, (s->
cols * 4 - x + px + 3) << 4);
2913 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2918 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 4, (s->
rows * 4 - y + py + 3) << 4);
2921 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2927 ref_u += y * src_stride_u + x * bytesperpixel;
2928 ref_v += y * src_stride_v + x * bytesperpixel;
2931 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2932 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2936 th = (y + refbh_m1 + 4 + 7) >> (6 - s->
ss_v);
2938 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2940 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2942 refbw_m1 + 8, refbh_m1 + 8,
2943 x - 3, y - 3, w, h);
2945 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2948 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2950 refbw_m1 + 8, refbh_m1 + 8,
2951 x - 3, y - 3, w, h);
2953 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2955 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2956 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2961 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2962 px, py, pw, ph, bw, bh, w, h, i) \
2963 mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2964 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2965 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2966 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2967 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2968 mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2969 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2970 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2972 #define FN(x) x##_scaled_8bpp
2973 #define BYTES_PER_PIXEL 1
2976 #undef BYTES_PER_PIXEL
2977 #define FN(x) x##_scaled_16bpp
2978 #define BYTES_PER_PIXEL 2
2981 #undef mc_chroma_dir
2983 #undef BYTES_PER_PIXEL
2990 int row = s->
row, col = s->
col;
2993 if (bytesperpixel == 1) {
2994 inter_pred_scaled_8bpp(ctx);
2996 inter_pred_scaled_16bpp(ctx);
2999 if (bytesperpixel == 1) {
3000 inter_pred_8bpp(ctx);
3002 inter_pred_16bpp(ctx);
3008 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
3009 int h4 =
bwh_tab[1][b->
bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3010 int end_x =
FFMIN(2 * (s->
cols - col), w4);
3011 int end_y =
FFMIN(2 * (s->
rows - row), h4);
3013 int uvstep1d = 1 << b->
uvtx, p;
3017 for (
n = 0, y = 0; y < end_y; y += step1d) {
3019 for (x = 0; x < end_x; x += step1d,
3020 ptr += 4 * step1d * bytesperpixel,
n += step) {
3025 s->
block + 16 * n * bytesperpixel, eob);
3033 step = 1 << (b->
uvtx * 2);
3034 for (p = 0; p < 2; p++) {
3035 dst = s->
dst[p + 1];
3036 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
3038 for (x = 0; x < end_x; x += uvstep1d,
3039 ptr += 4 * uvstep1d * bytesperpixel,
n += step) {
3044 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
3063 int row_and_7,
int col_and_7,
3064 int w,
int h,
int col_end,
int row_end,
3067 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3068 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3080 if (tx ==
TX_4X4 && (ss_v | ss_h)) {
3095 if (tx ==
TX_4X4 && !skip_inter) {
3096 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3098 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3100 for (y = row_and_7; y < h + row_and_7; y++) {
3101 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3103 mask[0][y][1] |= m_row_8;
3104 mask[0][y][2] |= m_row_4;
3115 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3116 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3118 mask[1][y][col_mask_id] |= m_col;
3121 mask[0][y][3] |= m_col;
3123 if (ss_h && (col_end & 1))
3124 mask[1][y][3] |= (t << (w - 1)) - t;
3126 mask[1][y][3] |= m_col;
3130 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3133 int mask_id = (tx ==
TX_8X8);
3134 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3135 int l2 = tx + ss_h - 1, step1d;
3136 int m_row = m_col & masks[l2];
3140 if (ss_h && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
3141 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3142 int m_row_8 = m_row - m_row_16;
3144 for (y = row_and_7; y < h + row_and_7; y++) {
3145 mask[0][y][0] |= m_row_16;
3146 mask[0][y][1] |= m_row_8;
3149 for (y = row_and_7; y < h + row_and_7; y++)
3150 mask[0][y][mask_id] |= m_row;
3155 if (ss_v && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
3156 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3157 mask[1][y][0] |= m_col;
3158 if (y - row_and_7 == h - 1)
3159 mask[1][y][1] |= m_col;
3161 for (y = row_and_7; y < h + row_and_7; y += step1d)
3162 mask[1][y][mask_id] |= m_col;
3164 }
else if (tx !=
TX_4X4) {
3167 mask_id = (tx ==
TX_8X8) || (h == ss_v);
3168 mask[1][row_and_7][mask_id] |= m_col;
3169 mask_id = (tx ==
TX_8X8) || (w == ss_h);
3170 for (y = row_and_7; y < h + row_and_7; y++)
3171 mask[0][y][mask_id] |= t;
3173 int t8 = t & wide_filter_col_mask[ss_h],
t4 = t -
t8;
3175 for (y = row_and_7; y < h + row_and_7; y++) {
3179 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3185 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3200 s->
min_mv.
x = -(128 + col * 64);
3201 s->
min_mv.
y = -(128 + row * 64);
3209 b->
uvtx = b->tx - ((s->
ss_h && w4 * 2 == (1 << b->tx)) ||
3210 (s->
ss_v && h4 * 2 == (1 << b->tx)));
3215 if (bytesperpixel == 1) {
3228 #define SPLAT_ZERO_CTX(v, n) \
3230 case 1: v = 0; break; \
3231 case 2: AV_ZERO16(&v); break; \
3232 case 4: AV_ZERO32(&v); break; \
3233 case 8: AV_ZERO64(&v); break; \
3234 case 16: AV_ZERO128(&v); break; \
3236 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3238 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3239 if (s->ss_##dir2) { \
3240 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3241 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3243 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3244 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3264 s->
block += w4 * h4 * 64 * bytesperpixel;
3267 s->
eob += 4 * w4 * h4;
3278 emu[0] = (col + w4) * 8 * bytesperpixel > f->
linesize[0] ||
3279 (row + h4) > s->
rows;
3280 emu[1] = ((col + w4) * 8 >> s->
ss_h) * bytesperpixel > f->
linesize[1] ||
3281 (row + h4) > s->
rows;
3286 s->
dst[0] = f->
data[0] + yoff;
3294 s->
dst[1] = f->
data[1] + uvoff;
3295 s->
dst[2] = f->
data[2] + uvoff;
3314 for (
n = 0; o < w;
n++) {
3320 s->
tmp_y + o * bytesperpixel, 128,
h, 0, 0);
3329 for (
n = s->
ss_h; o < w;
n++) {
3335 s->
tmp_uv[0] + o * bytesperpixel, 128,
h, 0, 0);
3337 s->
tmp_uv[1] + o * bytesperpixel, 128,
h, 0, 0);
3351 mask_edges(lflvl->
mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3356 b->
uvtx, skip_inter);
3363 limit >>= (sharp + 3) >> 2;
3364 limit =
FFMIN(limit, 9 - sharp);
3366 limit =
FFMAX(limit, 1);
3375 s->
block += w4 * h4 * 64 * bytesperpixel;
3378 s->
eob += 4 * w4 * h4;
3385 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3393 ptrdiff_t hbs = 4 >> bl;
3400 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3401 }
else if (col + hbs < s->cols) {
3402 if (row + hbs < s->rows) {
3406 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3409 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3410 yoff += hbs * 8 * y_stride;
3411 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3412 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3415 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3416 yoff += hbs * 8 * bytesperpixel;
3417 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3418 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3421 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3423 yoff + 8 * hbs * bytesperpixel,
3424 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3425 yoff += hbs * 8 * y_stride;
3426 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3427 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3428 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3429 yoff + 8 * hbs * bytesperpixel,
3430 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3437 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3439 yoff + 8 * hbs * bytesperpixel,
3440 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3443 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3445 }
else if (row + hbs < s->rows) {
3448 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3449 yoff += hbs * 8 * y_stride;
3450 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3451 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3454 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3458 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3464 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3468 ptrdiff_t hbs = 4 >> bl;
3475 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3476 }
else if (s->
b->
bl == bl) {
3477 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3479 yoff += hbs * 8 * y_stride;
3480 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3481 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3483 yoff += hbs * 8 * bytesperpixel;
3484 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3485 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3489 if (col + hbs < s->cols) {
3490 if (row + hbs < s->rows) {
3491 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3492 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3493 yoff += hbs * 8 * y_stride;
3494 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3495 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3497 yoff + 8 * hbs * bytesperpixel,
3498 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3500 yoff += hbs * 8 * bytesperpixel;
3501 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3502 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3504 }
else if (row + hbs < s->rows) {
3505 yoff += hbs * 8 * y_stride;
3506 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3507 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3519 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3520 uint8_t *ptr = dst, *l = lvl, *hmask1 =
mask[y], *hmask2 =
mask[y + 1 + ss_v];
3521 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3522 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3523 unsigned hm = hm1 | hm2 | hm13 | hm23;
3525 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3528 int L = *l,
H = L >> 4;
3531 if (hmask1[0] & x) {
3532 if (hmask2[0] & x) {
3538 }
else if (hm2 & x) {
3545 [0](ptr, ls,
E, I,
H);
3548 [0](ptr, ls, E, I, H);
3550 }
else if (hm2 & x) {
3551 int L = l[8 << ss_v],
H = L >> 4;
3555 [0](ptr + 8 * ls, ls, E, I, H);
3563 int L = *l,
H = L >> 4;
3575 }
else if (hm23 & x) {
3576 int L = l[8 << ss_v],
H = L >> 4;
3596 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3598 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3600 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3603 int L = *l,
H = L >> 4;
3607 if (vmask[0] & (x << (1 + ss_h))) {
3613 }
else if (vm & (x << (1 + ss_h))) {
3619 [!!(vmask[1] & (x << (1 + ss_h)))]
3620 [1](ptr, ls,
E, I,
H);
3623 [1](ptr, ls, E, I, H);
3625 }
else if (vm & (x << (1 + ss_h))) {
3626 int L = l[1 + ss_h],
H = L >> 4;
3630 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3635 int L = *l,
H = L >> 4;
3638 if (vm3 & (x << (1 + ss_h))) {
3647 }
else if (vm3 & (x << (1 + ss_h))) {
3648 int L = l[1 + ss_h],
H = L >> 4;
3665 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3683 for (p = 0; p < 2; p++) {
3684 dst = f->
data[1 + p] + uvoff;
3692 int sb_start = ( idx *
n) >> log2_n;
3693 int sb_end = ((idx + 1) * n) >> log2_n;
3694 *start =
FFMIN(sb_start, n) << 3;
3695 *end =
FFMIN(sb_end, n) << 3;
3699 int max_count,
int update_factor)
3701 unsigned ct = ct0 + ct1, p2, p1;
3707 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3708 p2 = av_clip(p2, 1, 255);
3709 ct =
FFMIN(ct, max_count);
3710 update_factor =
FASTDIV(update_factor * ct, max_count);
3713 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3723 for (i = 0; i < 4; i++)
3724 for (j = 0; j < 2; j++)
3725 for (k = 0; k < 2; k++)
3726 for (l = 0; l < 6; l++)
3727 for (m = 0; m < 6; m++) {
3732 if (l == 0 && m >= 3)
3736 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3749 for (i = 0; i < 3; i++)
3753 for (i = 0; i < 4; i++)
3758 for (i = 0; i < 5; i++)
3764 for (i = 0; i < 5; i++)
3770 for (i = 0; i < 5; i++) {
3774 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3775 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3780 for (i = 0; i < 4; i++)
3781 for (j = 0; j < 4; j++) {
3785 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3786 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3792 for (i = 0; i < 2; i++) {
3798 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3806 for (i = 0; i < 4; i++) {
3810 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3816 for (i = 0; i < 7; i++) {
3820 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3821 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3830 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3831 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3836 for (i = 0; i < 2; i++) {
3838 unsigned *
c, (*c2)[2], sum;
3845 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3850 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3853 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3857 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3865 for (j = 0; j < 10; j++)
3866 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3868 for (j = 0; j < 2; j++) {
3871 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3872 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3877 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3878 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3890 for (i = 0; i < 4; i++) {
3894 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3914 for (i = 0; i < 10; i++) {
3918 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3950 for (i = 0; i < 3; i++) {
3955 for (i = 0; i < 8; i++) {
3977 int res, tile_row, tile_col, i, ref, row, col;
3980 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3986 }
else if (res == 0) {
3995 for (i = 0; i < 8; i++) {
4037 for (i = 0; i < 8; i++) {
4079 "Failed to allocate block buffers\n");
4085 for (i = 0; i < 4; i++) {
4086 for (j = 0; j < 2; j++)
4087 for (k = 0; k < 2; k++)
4088 for (l = 0; l < 6; l++)
4089 for (m = 0; m < 6; m++)