FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 
26 #include "libswscale/swscale.h"
28 
29 #include "checkasm.h"
30 
31 #define randomize_buffers(buf, size) \
32  do { \
33  int j; \
34  for (j = 0; j < size; j+=4) \
35  AV_WN32(buf + j, rnd()); \
36  } while (0)
37 
38 static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
39  const int16_t **src, uint8_t *dest, int dstW,
40  const uint8_t *dither, int offset)
41 {
42  // This corresponds to the yuv2planeX_8_c function
43  int i;
44  for (i = 0; i < dstW; i++) {
45  int val = dither[(i + offset) & 7] << 12;
46  int j;
47  for (j = 0; j < filterSize; j++)
48  val += src[j][i] * filter[j];
49 
50  dest[i]= av_clip_uint8(val >> 19);
51  }
52 }
53 
54 static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
55 {
56  for (size_t i = 0; i < n; i++) {
57  if (abs(ref[i] - test[i]) > accuracy)
58  return 1;
59  }
60  return 0;
61 }
62 
63 static void print_data(uint8_t *p, size_t len, size_t offset)
64 {
65  size_t i = 0;
66  for (; i < len; i++) {
67  if (i % 8 == 0) {
68  printf("0x%04zx: ", i+offset);
69  }
70  printf("0x%02x ", (uint32_t) p[i]);
71  if (i % 8 == 7) {
72  printf("\n");
73  }
74  }
75  if (i % 8 != 0) {
76  printf("\n");
77  }
78 }
79 
80 static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
81 {
82  for (size_t i = 0; i < len; i++) {
83  if (a[i] != b[i]) {
84  size_t offset_of_mismatch = i;
85  size_t offset;
86  if (i >= 8) i-=8;
87  offset = i & (~7);
88  printf("test a:\n");
89  print_data(&a[offset], 32, offset);
90  printf("\ntest b:\n");
91  print_data(&b[offset], 32, offset);
92  printf("\n");
93  return offset_of_mismatch;
94  }
95  }
96  return len;
97 }
98 
99 static void check_yuv2yuv1(int accurate)
100 {
101  struct SwsContext *ctx;
102  int osi, isi;
103  int dstW, offset;
104  size_t fail_offset;
105  const int input_sizes[] = {8, 24, 128, 144, 256, 512};
106  const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
107  #define LARGEST_INPUT_SIZE 512
108 
109  const int offsets[] = {0, 3, 8, 11, 16, 19};
110  const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);
111  const char *accurate_str = (accurate) ? "accurate" : "approximate";
112 
114  const int16_t *src, uint8_t *dest,
115  int dstW, const uint8_t *dither, int offset);
116 
117  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);
118  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
119  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
120  LOCAL_ALIGNED_8(uint8_t, dither, [8]);
121 
122  randomize_buffers((uint8_t*)dither, 8);
123  randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
125  if (accurate)
127  if (sws_init_context(ctx, NULL, NULL) < 0)
128  fail();
129 
131  for (isi = 0; isi < INPUT_SIZES; ++isi) {
132  dstW = input_sizes[isi];
133  for (osi = 0; osi < OFFSET_SIZES; osi++) {
134  offset = offsets[osi];
135  if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){
136  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
137  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
138 
139  call_ref(src_pixels, dst0, dstW, dither, offset);
140  call_new(src_pixels, dst1, dstW, dither, offset);
141  if (cmp_off_by_n(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {
142  fail();
143  printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str);
144  fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
145  printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
146  (int) src_pixels[fail_offset],
147  (int) dither[(fail_offset + fail_offset) & 7],
148  (int) dst0[fail_offset],
149  (int) dst1[fail_offset]);
150  }
151  if(dstW == LARGEST_INPUT_SIZE)
152  bench_new(src_pixels, dst1, dstW, dither, offset);
153  }
154  }
155  }
157 }
158 
159 static void check_yuv2yuvX(int accurate)
160 {
161  struct SwsContext *ctx;
162  int fsi, osi, isi, i, j;
163  int dstW;
164 #define LARGEST_FILTER 16
165  // ff_yuv2planeX_8_sse2 can't handle odd filter sizes
166  const int filter_sizes[] = {2, 4, 8, 16};
167  const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
168 #define LARGEST_INPUT_SIZE 512
169  static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
170  const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
171  const char *accurate_str = (accurate) ? "accurate" : "approximate";
172 
173  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
174  int filterSize, const int16_t **src, uint8_t *dest,
175  int dstW, const uint8_t *dither, int offset);
176 
177  const int16_t **src;
178  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
179  LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
180  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
181  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
183  union VFilterData{
184  const int16_t *src;
185  uint16_t coeff[8];
186  } *vFilterData;
187  uint8_t d_val = rnd();
188  memset(dither, d_val, LARGEST_INPUT_SIZE);
189  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
191  if (accurate)
193  if (sws_init_context(ctx, NULL, NULL) < 0)
194  fail();
195 
197  for(isi = 0; isi < INPUT_SIZES; ++isi){
198  dstW = input_sizes[isi];
199  for(osi = 0; osi < 64; osi += 16){
200  if (dstW <= osi)
201  continue;
202  for (fsi = 0; fsi < FILTER_SIZES; ++fsi) {
203  // Generate filter coefficients for the given filter size,
204  // with some properties:
205  // - The coefficients add up to the intended sum (4096, 1<<12)
206  // - The coefficients contain negative values
207  // - The filter intermediates don't overflow for worst case
208  // inputs (all positive coefficients are coupled with
209  // input_max and all negative coefficients with input_min,
210  // or vice versa).
211  // Produce a filter with all coefficients set to
212  // -((1<<12)/(filter_size-1)) except for one (randomly chosen)
213  // which is set to ((1<<13)-1).
214  for (i = 0; i < filter_sizes[fsi]; ++i)
215  filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1));
216  filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1;
217 
218  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
219  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
220  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
221  for (i = 0; i < filter_sizes[fsi]; ++i) {
222  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
223  vFilterData[i].src = src[i] - osi;
224  for(j = 0; j < 4; ++j)
225  vFilterData[i].coeff[j + 4] = filter_coeff[i];
226  }
227  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d_%s", filter_sizes[fsi], osi, dstW, accurate_str)){
228  // use vFilterData for the mmx function
229  const int16_t *filter = ctx->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];
230  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
231  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
232 
233  // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that
234  // function or not, so we can't pass it the parameters correctly.
235  yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
236 
237  call_new(filter, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
238  if (cmp_off_by_n(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]), accurate ? 0 : 2)) {
239  fail();
240  printf("failed: yuv2yuvX_%d_%d_%d_%s\n", filter_sizes[fsi], osi, dstW, accurate_str);
241  show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
242  }
243  if(dstW == LARGEST_INPUT_SIZE)
244  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
245 
246  }
247  av_freep(&src);
248  av_freep(&vFilterData);
249  }
250  }
251  }
253 #undef FILTER_SIZES
254 }
255 
256 #undef SRC_PIXELS
257 #define SRC_PIXELS 512
258 
259 static void check_hscale(void)
260 {
261 #define MAX_FILTER_WIDTH 40
262 #define FILTER_SIZES 6
263  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
264 
265 #define HSCALE_PAIRS 2
266  static const int hscale_pairs[HSCALE_PAIRS][2] = {
267  { 8, 14 },
268  { 8, 18 },
269  };
270 
271 #define LARGEST_INPUT_SIZE 512
272 #define INPUT_SIZES 6
273  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
274 
275  int i, j, fsi, hpi, width, dstWi;
276  struct SwsContext *ctx;
277 
278  // padded
279  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
280  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
281  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
282 
283  // padded
285  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
286  LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
287  LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
288 
289  // The dst parameter here is either int16_t or int32_t but we use void* to
290  // just cover both cases.
291  declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
292  const uint8_t *src, const int16_t *filter,
293  const int32_t *filterPos, int filterSize);
294 
296  if (sws_init_context(ctx, NULL, NULL) < 0)
297  fail();
298 
300 
301  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
302  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
303  for (dstWi = 0; dstWi < INPUT_SIZES; dstWi++) {
304  width = filter_sizes[fsi];
305 
306  ctx->srcBpc = hscale_pairs[hpi][0];
307  ctx->dstBpc = hscale_pairs[hpi][1];
308  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
309 
310  for (i = 0; i < SRC_PIXELS; i++) {
311  filterPos[i] = i;
312  filterPosAvx[i] = i;
313 
314  // These filter cofficients are chosen to try break two corner
315  // cases, namely:
316  //
317  // - Negative filter coefficients. The filters output signed
318  // values, and it should be possible to end up with negative
319  // output values.
320  //
321  // - Positive clipping. The hscale filter function has clipping
322  // at (1<<15) - 1
323  //
324  // The coefficients sum to the 1.0 point for the hscale
325  // functions (1 << 14).
326 
327  for (j = 0; j < width; j++) {
328  filter[i * width + j] = -((1 << 14) / (width - 1));
329  }
330  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
331  }
332 
333  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
334  // These values should be unused in SIMD implementations but
335  // may still be read, random coefficients here should help show
336  // issues where they are used in error.
337 
338  filter[SRC_PIXELS * width + i] = rnd();
339  }
340  ctx->dstW = ctx->chrDstW = input_sizes[dstWi];
342  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
343  ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW);
344 
345  if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {
346  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
347  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
348 
349  call_ref(NULL, dst0, ctx->dstW, src, filter, filterPos, width);
350  call_new(NULL, dst1, ctx->dstW, src, filterAvx2, filterPosAvx, width);
351  if (memcmp(dst0, dst1, ctx->dstW * sizeof(dst0[0])))
352  fail();
353  bench_new(NULL, dst0, ctx->dstW, src, filter, filterPosAvx, width);
354  }
355  }
356  }
357  }
359 }
360 
362 {
363  check_hscale();
364  report("hscale");
365  check_yuv2yuv1(0);
366  check_yuv2yuv1(1);
367  report("yuv2yuv1");
368  check_yuv2yuvX(0);
369  check_yuv2yuvX(1);
370  report("yuv2yuvX");
371 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:131
check_yuv2yuv1
static void check_yuv2yuv1(int accurate)
Definition: sw_scale.c:99
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:514
mem_internal.h
check_func
#define check_func(func,...)
Definition: checkasm.h:125
b
#define b
Definition: input.c:41
test
Definition: idctdsp.c:34
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
call_ref
#define call_ref(...)
Definition: checkasm.h:140
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
print_data
static void print_data(uint8_t *p, size_t len, size_t offset)
Definition: sw_scale.c:63
fail
#define fail()
Definition: checkasm.h:134
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:77
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:259
rnd
#define rnd()
Definition: checkasm.h:118
width
#define width
intreadwrite.h
offsets
static const int offsets[]
Definition: hevc_pel.c:34
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1222
LARGEST_FILTER
#define LARGEST_FILTER
cmp_off_by_n
static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
Definition: sw_scale.c:54
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:129
ctx
AVFormatContext * ctx
Definition: movenc.c:48
yuv2planeX_8_ref
static void yuv2planeX_8_ref(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:38
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:123
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:257
call_new
#define call_new(...)
Definition: checkasm.h:222
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:135
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1176
abs
#define abs(x)
Definition: cuda_runtime.h:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_init_scale
void ff_sws_init_scale(SwsContext *c)
Definition: swscale.c:589
ff_shuffle_filter_coefficients
int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)
Definition: utils.c:266
check_yuv2yuvX
static void check_yuv2yuvX(int accurate)
Definition: sw_scale.c:159
printf
printf("static const uint8_t my_array[100] = {\n")
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:90
show_differences
static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
Definition: sw_scale.c:80
report
#define report
Definition: checkasm.h:137
bench_new
#define bench_new(...)
Definition: checkasm.h:287
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
len
int len
Definition: vorbis_enc_data.h:426
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:31
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:2026
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2415
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:112
av_clip_uint8
#define av_clip_uint8
Definition: common.h:101
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:78
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:361
SwsContext
Definition: swscale_internal.h:299
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58