32 #define PIXEL_STRIDE 16 34 #define randomize_buffers(src, dst, stride, coef) \ 37 for (y = 0; y < 4; y++) { \ 38 AV_WN32A((src) + y * (stride), rnd()); \ 39 AV_WN32A((dst) + y * (stride), rnd()); \ 40 for (x = 0; x < 4; x++) \ 41 (coef)[y * 4 + x] = (src)[y * (stride) + x] - \ 42 (dst)[y * (stride) + x]; \ 49 for (i = 0; i < 4; i++) {
50 const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;
51 const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;
52 const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;
53 const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;
54 coef[i*4 + 0] = a1 +
b1;
55 coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
56 coef[i*4 + 2] = a1 -
b1;
57 coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
59 for (i = 0; i < 4; i++) {
60 const int a1 = coef[i + 0*4] + coef[i + 3*4];
61 const int b1 = coef[i + 1*4] + coef[i + 2*4];
62 const int c1 = coef[i + 1*4] - coef[i + 2*4];
63 const int d1 = coef[i + 0*4] - coef[i + 3*4];
64 coef[i + 0*4] = (a1 + b1 + 7) >> 4;
65 coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;
66 coef[i + 2*4] = (a1 - b1 + 7) >> 4;
67 coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
74 for (i = 0; i < 4; i++) {
75 int a1 = coef[0 * 4 +
i];
76 int b1 = coef[1 * 4 +
i];
77 int c1 = coef[2 * 4 +
i];
78 int d1 = coef[3 * 4 +
i];
92 for (i = 0; i < 4; i++) {
93 int a1 = coef[i * 4 + 0];
94 int b1 = coef[i * 4 + 1];
95 int c1 = coef[i * 4 + 2];
96 int d1 = coef[i * 4 + 3];
105 coef[i * 4 + 0] = a1 * 2;
106 coef[i * 4 + 1] = c1 * 2;
107 coef[i * 4 + 2] = d1 * 2;
108 coef[i * 4 + 3] = b1 * 2;
130 for (dc = 0; dc <= 1; dc++) {
135 memset(subcoef0, 0, 4 * 4 *
sizeof(int16_t));
136 subcoef0[0] = coef[0];
138 memcpy(subcoef0, coef, 4 * 4 *
sizeof(int16_t));
140 memcpy(dst0, dst, 4 * 4);
141 memcpy(dst1, dst, 4 * 4);
142 memcpy(subcoef1, subcoef0, 4 * 4 *
sizeof(int16_t));
148 if (memcmp(dst0, dst1, 4 * 4) ||
149 memcmp(subcoef0, subcoef1, 4 * 4 *
sizeof(int16_t)))
172 for (chroma = 0; chroma <= 1; chroma++) {
174 if (
check_func(idct4dc,
"vp8_idct_dc_add4%s", chroma ?
"uv" :
"y")) {
175 ptrdiff_t stride = chroma ? 8 : 16;
176 int w = chroma ? 2 : 4;
177 for (i = 0; i < 4; i++) {
178 int blockx = 4 * (i %
w);
179 int blocky = 4 * (i /
w);
180 randomize_buffers(
src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);
182 memset(&coef[i][1], 0, 15 *
sizeof(int16_t));
185 memcpy(dst0, dst, 4 * 4 * 4);
186 memcpy(dst1, dst, 4 * 4 * 4);
187 memcpy(subcoef0, coef, 4 * 4 * 4 *
sizeof(int16_t));
188 memcpy(subcoef1, coef, 4 * 4 * 4 *
sizeof(int16_t));
191 if (memcmp(dst0, dst1, 4 * 4 * 4) ||
192 memcmp(subcoef0, subcoef1, 4 * 4 * 4 *
sizeof(int16_t)))
205 int16_t
block[4][4][16];
215 for (blocky = 0; blocky < 4; blocky++) {
216 for (blockx = 0; blockx < 4; blockx++) {
220 dct4x4(block[blocky][blockx]);
221 dc[blocky * 4 + blockx] = block[blocky][blockx][0];
222 block[blocky][blockx][0] =
rnd();
227 for (dc_only = 0; dc_only <= 1; dc_only++) {
232 memset(dc0, 0, 16 *
sizeof(int16_t));
235 memcpy(dc0, dc, 16 *
sizeof(int16_t));
237 memcpy(dc1, dc0, 16 *
sizeof(int16_t));
238 memcpy(block0, block, 4 * 4 * 16 *
sizeof(int16_t));
239 memcpy(
block1, block, 4 * 4 * 16 *
sizeof(int16_t));
242 if (memcmp(block0,
block1, 4 * 4 * 16 *
sizeof(int16_t)) ||
243 memcmp(dc0, dc1, 16 *
sizeof(int16_t)))
250 #define SRC_BUF_STRIDE 32 251 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE) 255 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1) 257 #undef randomize_buffers 258 #define randomize_buffers() \ 261 for (k = 0; k < SRC_BUF_SIZE; k += 4) { \ 262 AV_WN32A(buf + k, rnd()); \ 277 for (type = 0; type < 2; type++) {
279 for (k = 1; k < 8; k++) {
281 int size = 16 >> hsize;
283 for (dy = 0; dy < 3; dy++) {
284 for (dx = 0; dx < 3; dx++) {
288 static const char *dx_names[] = {
"",
"h4",
"h6" };
289 static const char *dy_names[] = {
"",
"v4",
"v6" };
290 snprintf(str,
sizeof(str),
"epel%d_%s%s", size, dx_names[dx], dy_names[dy]);
292 snprintf(str,
sizeof(str),
"bilin%d_%s%s", size, dx ?
"h" :
"", dy ?
"v" :
"");
295 snprintf(str,
sizeof(str),
"pixels%d", size);
301 mx = dx == 2 ? 2 + 2 * (
rnd() % 3) : dx == 1 ? 1 + 2 * (
rnd() % 4) : 0;
302 my = dy == 2 ? 2 + 2 * (
rnd() % 3) : dy == 1 ? 1 + 2 * (
rnd() % 4) : 0;
304 mx = dx ? 1 + (
rnd() % 7) : 0;
305 my = dy ? 1 + (
rnd() % 7) : 0;
308 for (i = -2; i <= 3; i++) {
309 int val = (i == -1 || i == 2) ? 0 : 0xff;
317 if (memcmp(dst0, dst1, size * height))
327 #undef randomize_buffers 329 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c) 331 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1))) 333 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1)) 336 int dir,
int flim_E,
int flim_I,
340 uint32_t
mask = 0xff;
341 int off = dir ? lineoff : lineoff *
str;
342 int istride = dir ? 1 :
str;
343 int jstride = dir ? str : 1;
345 for (i = 0; i < 8; i += 2) {
350 int idx = off + i * istride, p2, p1, p0,
q0,
q1, q2;
352 if (i == 0 && force_hev >= 0 || force_hev > 0)
353 setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);
355 setdx(idx, 1, q1 = q0, hev_thresh);
356 setdx(idx, 2, q2 = q1, flim_I);
357 setdx(idx, 3, q2, flim_I);
358 setdx(idx, -1, p0 = q0, flim_E >> 2);
359 if (i == 2 && force_hev >= 0 || force_hev > 0)
360 setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);
362 setdx(idx, -2, p1 = p0, hev_thresh);
363 setdx(idx, -3, p2 = p1, flim_I);
364 setdx(idx, -4, p2, flim_I);
372 for (y = 0; y <
h; y++)
373 for (x = 0; x <
w; x++)
374 buf[y * stride + x] =
rnd() & 0xff;
377 #define randomize_buffers(buf, lineoff, str, force_hev) \ 378 randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev) 385 int dir, edge, force_hev;
386 int flim_E = 20, flim_I = 10, hev_thresh = 7;
391 for (dir = 0; dir < 2; dir++) {
392 int midoff = dir ? 4 * 16 : 4;
393 int midoff_aligned = dir ? 4 * 16 : 16;
394 uint8_t *buf0 = base0 + midoff_aligned;
395 uint8_t *buf1 = base1 + midoff_aligned;
396 for (edge = 0; edge < 2; edge++) {
398 switch (dir << 1 | edge) {
404 if (
check_func(
func,
"vp8_loop_filter16y%s_%s", edge ?
"_inner" :
"", dir ?
"v" :
"h")) {
405 for (force_hev = -1; force_hev <= 1; force_hev++) {
409 memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
410 call_ref(buf0, 16, flim_E, flim_I, hev_thresh);
411 call_new(buf1, 16, flim_E, flim_I, hev_thresh);
412 if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
418 bench_new(buf0, 16, flim_E, flim_I, hev_thresh);
431 int dir, edge, force_hev;
432 int flim_E = 20, flim_I = 10, hev_thresh = 7;
437 for (dir = 0; dir < 2; dir++) {
438 int midoff = dir ? 4 * 16 : 4;
439 int midoff_aligned = dir ? 4 * 16 : 16;
440 uint8_t *buf0u = base0u + midoff_aligned;
441 uint8_t *buf0v = base0v + midoff_aligned;
442 uint8_t *buf1u = base1u + midoff_aligned;
443 uint8_t *buf1v = base1v + midoff_aligned;
444 for (edge = 0; edge < 2; edge++) {
446 switch (dir << 1 | edge) {
452 if (
check_func(
func,
"vp8_loop_filter8uv%s_%s", edge ?
"_inner" :
"", dir ?
"v" :
"h")) {
453 for (force_hev = -1; force_hev <= 1; force_hev++) {
458 memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);
459 memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);
461 call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
462 call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);
463 if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||
464 memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))
471 bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);
483 int flim_E = 20, flim_I = 30, hev_thresh = 0;
488 for (dir = 0; dir < 2; dir++) {
489 int midoff = dir ? 4 * 16 : 4;
490 int midoff_aligned = dir ? 4 * 16 : 16;
491 uint8_t *buf0 = base0 + midoff_aligned;
492 uint8_t *buf1 = base1 + midoff_aligned;
498 memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);
501 if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))
static void check_mc(void)
void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
static void wht4x4(int16_t *coef)
static void idct(int16_t block[64])
static void check_loopfilter_8uv(void)
void(* vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
static void check_idct(void)
vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]
void(* vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
static const uint8_t q1[256]
vp8_mc_func put_vp8_epel_pixels_tab[3][3][3]
first dimension: 4-log2(width) second dimension: 0 if no vertical interpolation is needed; 1 4-tap ve...
#define LOCAL_ALIGNED_16(t, v,...)
The exact code depends on how similar the blocks are and how related they are to the block
void(* vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16])
void(* vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
VP8 compatible video decoder.
static void check_luma_dc_wht(void)
static void check_idct_dc4(void)
void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define randomize_buffers(src, dst, stride, coef)
static void check_loopfilter_simple(void)
static const uint16_t mask[17]
void(* vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
static const uint8_t q0[256]
static void check_loopfilter_16y(void)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void checkasm_check_vp8dsp(void)
#define declare_func_emms(cpu_flags, ret,...)
static double b1(void *priv, double x, double y)
static void dct4x4(int16_t *coef)
Libavcodec external API header.
void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)
void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)
#define AV_CPU_FLAG_MMX
standard MMX
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
int(* func)(AVBPrint *dst, const char *in, const char *arg)
#define check_func(func,...)
void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])
void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
GLint GLenum GLboolean GLsizei stride
common internal and external API header
void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define setdx(a, b, c, d)
#define setdx2(a, b, o, c, d, e)
static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
static const struct twinvq_data tab
static int16_t block1[64]
void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8dsp_init(VP8DSPContext *c)
static double val(void *priv, double ch)