31 #define randomize_buffers(buf, size) \
34 for (j = 0; j < size; j+=4) \
35 AV_WN32(buf + j, rnd()); \
39 const int16_t **
src, uint8_t *dest,
int dstW,
44 for (
i = 0;
i < dstW;
i++) {
47 for (j = 0; j < filterSize; j++)
56 for (
size_t i = 0;
i < n;
i++) {
66 for (;
i <
len;
i++) {
70 printf(
"0x%02x ", (uint32_t) p[
i]);
82 for (
size_t i = 0;
i <
len;
i++) {
84 size_t offset_of_mismatch =
i;
93 return offset_of_mismatch;
105 const int input_sizes[] = {8, 24, 128, 144, 256, 512};
106 const int INPUT_SIZES =
sizeof(input_sizes)/
sizeof(input_sizes[0]);
107 #define LARGEST_INPUT_SIZE 512
109 const int offsets[] = {0, 3, 8, 11, 16, 19};
111 const char *accurate_str = (accurate) ?
"accurate" :
"approximate";
114 const int16_t *
src, uint8_t *dest,
132 dstW = input_sizes[isi];
133 for (osi = 0; osi < OFFSET_SIZES; osi++) {
145 printf(
"failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
146 (
int) src_pixels[fail_offset],
147 (
int)
dither[(fail_offset + fail_offset) & 7],
148 (
int) dst0[fail_offset],
149 (
int) dst1[fail_offset]);
162 int fsi, osi, isi,
i, j;
164 #define LARGEST_FILTER 16
166 const int filter_sizes[] = {2, 4, 8, 16};
167 const int FILTER_SIZES =
sizeof(filter_sizes)/
sizeof(filter_sizes[0]);
168 #define LARGEST_INPUT_SIZE 512
169 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
170 const int INPUT_SIZES =
sizeof(input_sizes)/
sizeof(input_sizes[0]);
171 const char *accurate_str = (accurate) ?
"accurate" :
"approximate";
174 int filterSize,
const int16_t **
src, uint8_t *dest,
187 uint8_t d_val =
rnd();
198 dstW = input_sizes[isi];
199 for(osi = 0; osi < 64; osi += 16){
214 for (
i = 0;
i < filter_sizes[fsi]; ++
i)
215 filter_coeff[
i] = -((1 << 12) / (filter_sizes[fsi] - 1));
216 filter_coeff[
rnd() % filter_sizes[fsi]] = (1 << 13) - 1;
219 vFilterData =
av_malloc((filter_sizes[fsi] + 2) *
sizeof(
union VFilterData));
220 memset(vFilterData, 0, (filter_sizes[fsi] + 2) *
sizeof(
union VFilterData));
221 for (
i = 0;
i < filter_sizes[fsi]; ++
i) {
223 vFilterData[
i].src =
src[
i] - osi;
224 for(j = 0; j < 4; ++j)
225 vFilterData[
i].
coeff[j + 4] = filter_coeff[
i];
227 if (
check_func(
ctx->yuv2planeX,
"yuv2yuvX_%d_%d_%d_%s", filter_sizes[fsi], osi, dstW, accurate_str)){
229 const int16_t *
filter =
ctx->use_mmx_vfilter ? (
const int16_t*)vFilterData : &filter_coeff[0];
240 printf(
"failed: yuv2yuvX_%d_%d_%d_%s\n", filter_sizes[fsi], osi, dstW, accurate_str);
244 bench_new((
const int16_t*)vFilterData, filter_sizes[fsi],
src, dst1, dstW - osi,
dither, osi);
257 #define SRC_PIXELS 512
261 #define MAX_FILTER_WIDTH 40
262 #define FILTER_SIZES 6
263 static const int filter_sizes[
FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
265 #define HSCALE_PAIRS 2
271 #define LARGEST_INPUT_SIZE 512
272 #define INPUT_SIZES 6
273 static const int input_sizes[
INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
275 int i, j, fsi, hpi,
width, dstWi;
292 const uint8_t *
src,
const int16_t *
filter,
293 const int32_t *filterPos,
int filterSize);
304 width = filter_sizes[fsi];
306 ctx->srcBpc = hscale_pairs[hpi][0];
307 ctx->dstBpc = hscale_pairs[hpi][1];
327 for (j = 0; j <
width; j++) {
340 ctx->dstW =
ctx->chrDstW = input_sizes[dstWi];
346 memset(dst0, 0,
SRC_PIXELS *
sizeof(dst0[0]));
347 memset(dst1, 0,
SRC_PIXELS *
sizeof(dst1[0]));
351 if (memcmp(dst0, dst1,
ctx->dstW *
sizeof(dst0[0])))