FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "libswscale/swscale.h"
29 
30 #include "checkasm.h"
31 
32 #define randomize_buffers(buf, size) \
33  do { \
34  int j; \
35  for (j = 0; j < size; j+=4) \
36  AV_WN32(buf + j, rnd()); \
37  } while (0)
38 
39 static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
40  const int16_t **src, uint8_t *dest, int dstW,
41  const uint8_t *dither, int offset)
42 {
43  // This corresponds to the yuv2planeX_8_c function
44  int i;
45  for (i = 0; i < dstW; i++) {
46  int val = dither[(i + offset) & 7] << 12;
47  int j;
48  for (j = 0; j < filterSize; j++)
49  val += src[j][i] * filter[j];
50 
51  dest[i]= av_clip_uint8(val >> 19);
52  }
53 }
54 
55 static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
56 {
57  for (size_t i = 0; i < n; i++) {
58  if (abs(ref[i] - test[i]) > accuracy)
59  return 1;
60  }
61  return 0;
62 }
63 
64 static void print_data(uint8_t *p, size_t len, size_t offset)
65 {
66  size_t i = 0;
67  for (; i < len; i++) {
68  if (i % 8 == 0) {
69  printf("0x%04zx: ", i+offset);
70  }
71  printf("0x%02x ", (uint32_t) p[i]);
72  if (i % 8 == 7) {
73  printf("\n");
74  }
75  }
76  if (i % 8 != 0) {
77  printf("\n");
78  }
79 }
80 
81 static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
82 {
83  for (size_t i = 0; i < len; i++) {
84  if (a[i] != b[i]) {
85  size_t offset_of_mismatch = i;
86  size_t offset;
87  if (i >= 8) i-=8;
88  offset = i & (~7);
89  printf("test a:\n");
90  print_data(&a[offset], 32, offset);
91  printf("\ntest b:\n");
92  print_data(&b[offset], 32, offset);
93  printf("\n");
94  return offset_of_mismatch;
95  }
96  }
97  return len;
98 }
99 
100 static void check_yuv2yuv1(int accurate)
101 {
102  struct SwsContext *ctx;
103  int osi, isi;
104  int dstW, offset;
105  size_t fail_offset;
106  const int input_sizes[] = {8, 24, 128, 144, 256, 512};
107  const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
108  #define LARGEST_INPUT_SIZE 512
109 
110  const int offsets[] = {0, 3, 8, 11, 16, 19};
111  const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);
112  const char *accurate_str = (accurate) ? "accurate" : "approximate";
113 
114  declare_func(void,
115  const int16_t *src, uint8_t *dest,
116  int dstW, const uint8_t *dither, int offset);
117 
118  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);
119  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
120  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
121  LOCAL_ALIGNED_8(uint8_t, dither, [8]);
122 
123  randomize_buffers((uint8_t*)dither, 8);
124  randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
126  if (accurate)
128  if (sws_init_context(ctx, NULL, NULL) < 0)
129  fail();
130 
132  for (isi = 0; isi < INPUT_SIZES; ++isi) {
133  dstW = input_sizes[isi];
134  for (osi = 0; osi < OFFSET_SIZES; osi++) {
135  offset = offsets[osi];
136  if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){
137  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
138  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
139 
140  call_ref(src_pixels, dst0, dstW, dither, offset);
141  call_new(src_pixels, dst1, dstW, dither, offset);
142  if (cmp_off_by_n(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {
143  fail();
144  printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str);
145  fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
146  printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
147  (int) src_pixels[fail_offset],
148  (int) dither[(fail_offset + fail_offset) & 7],
149  (int) dst0[fail_offset],
150  (int) dst1[fail_offset]);
151  }
152  if(dstW == LARGEST_INPUT_SIZE)
153  bench_new(src_pixels, dst1, dstW, dither, offset);
154  }
155  }
156  }
158 }
159 
160 static void check_yuv2yuvX(int accurate)
161 {
162  struct SwsContext *ctx;
163  int fsi, osi, isi, i, j;
164  int dstW;
165 #define LARGEST_FILTER 16
166  // ff_yuv2planeX_8_sse2 can't handle odd filter sizes
167  const int filter_sizes[] = {2, 4, 8, 16};
168  const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
169 #define LARGEST_INPUT_SIZE 512
170  static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
171  const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
172  const char *accurate_str = (accurate) ? "accurate" : "approximate";
173 
174  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
175  int filterSize, const int16_t **src, uint8_t *dest,
176  int dstW, const uint8_t *dither, int offset);
177 
178  const int16_t **src;
179  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
180  LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
181  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
182  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
184  union VFilterData{
185  const int16_t *src;
186  uint16_t coeff[8];
187  } *vFilterData;
188  uint8_t d_val = rnd();
189  memset(dither, d_val, LARGEST_INPUT_SIZE);
190  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
192  if (accurate)
194  if (sws_init_context(ctx, NULL, NULL) < 0)
195  fail();
196 
198  for(isi = 0; isi < INPUT_SIZES; ++isi){
199  dstW = input_sizes[isi];
200  for(osi = 0; osi < 64; osi += 16){
201  if (dstW <= osi)
202  continue;
203  for (fsi = 0; fsi < FILTER_SIZES; ++fsi) {
204  // Generate filter coefficients for the given filter size,
205  // with some properties:
206  // - The coefficients add up to the intended sum (4096, 1<<12)
207  // - The coefficients contain negative values
208  // - The filter intermediates don't overflow for worst case
209  // inputs (all positive coefficients are coupled with
210  // input_max and all negative coefficients with input_min,
211  // or vice versa).
212  // Produce a filter with all coefficients set to
213  // -((1<<12)/(filter_size-1)) except for one (randomly chosen)
214  // which is set to ((1<<13)-1).
215  for (i = 0; i < filter_sizes[fsi]; ++i)
216  filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1));
217  filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1;
218 
219  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
220  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
221  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
222  for (i = 0; i < filter_sizes[fsi]; ++i) {
223  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
224  vFilterData[i].src = src[i] - osi;
225  for(j = 0; j < 4; ++j)
226  vFilterData[i].coeff[j + 4] = filter_coeff[i];
227  }
228  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d_%s", filter_sizes[fsi], osi, dstW, accurate_str)){
229  // use vFilterData for the mmx function
230  const int16_t *filter = ctx->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];
231  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
232  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
233 
234  // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that
235  // function or not, so we can't pass it the parameters correctly.
236  yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
237 
238  call_new(filter, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
239  if (cmp_off_by_n(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]), accurate ? 0 : 2)) {
240  fail();
241  printf("failed: yuv2yuvX_%d_%d_%d_%s\n", filter_sizes[fsi], osi, dstW, accurate_str);
242  show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
243  }
244  if(dstW == LARGEST_INPUT_SIZE)
245  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
246 
247  }
248  av_freep(&src);
249  av_freep(&vFilterData);
250  }
251  }
252  }
254 #undef FILTER_SIZES
255 }
256 
257 #undef SRC_PIXELS
258 #define SRC_PIXELS 512
259 
260 static void check_hscale(void)
261 {
262 #define MAX_FILTER_WIDTH 40
263 #define FILTER_SIZES 6
264  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
265 
266 #define HSCALE_PAIRS 2
267  static const int hscale_pairs[HSCALE_PAIRS][2] = {
268  { 8, 14 },
269  { 8, 18 },
270  };
271 
272 #define LARGEST_INPUT_SIZE 512
273 #define INPUT_SIZES 6
274  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
275 
276  int i, j, fsi, hpi, width, dstWi;
277  struct SwsContext *ctx;
278 
279  // padded
280  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
281  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
282  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
283 
284  // padded
286  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
287  LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
288  LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
289 
290  // The dst parameter here is either int16_t or int32_t but we use void* to
291  // just cover both cases.
292  declare_func(void, void *c, void *dst, int dstW,
293  const uint8_t *src, const int16_t *filter,
294  const int32_t *filterPos, int filterSize);
295 
297  if (sws_init_context(ctx, NULL, NULL) < 0)
298  fail();
299 
301 
302  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
303  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
304  for (dstWi = 0; dstWi < INPUT_SIZES; dstWi++) {
305  width = filter_sizes[fsi];
306 
307  ctx->srcBpc = hscale_pairs[hpi][0];
308  ctx->dstBpc = hscale_pairs[hpi][1];
309  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
310 
311  for (i = 0; i < SRC_PIXELS; i++) {
312  filterPos[i] = i;
313  filterPosAvx[i] = i;
314 
315  // These filter cofficients are chosen to try break two corner
316  // cases, namely:
317  //
318  // - Negative filter coefficients. The filters output signed
319  // values, and it should be possible to end up with negative
320  // output values.
321  //
322  // - Positive clipping. The hscale filter function has clipping
323  // at (1<<15) - 1
324  //
325  // The coefficients sum to the 1.0 point for the hscale
326  // functions (1 << 14).
327 
328  for (j = 0; j < width; j++) {
329  filter[i * width + j] = -((1 << 14) / (width - 1));
330  }
331  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
332  }
333 
334  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
335  // These values should be unused in SIMD implementations but
336  // may still be read, random coefficients here should help show
337  // issues where they are used in error.
338 
339  filter[SRC_PIXELS * width + i] = rnd();
340  }
341  ctx->dstW = ctx->chrDstW = input_sizes[dstWi];
343  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
344  ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW);
345 
346  if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {
347  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
348  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
349 
350  call_ref(NULL, dst0, ctx->dstW, src, filter, filterPos, width);
351  call_new(NULL, dst1, ctx->dstW, src, filterAvx2, filterPosAvx, width);
352  if (memcmp(dst0, dst1, ctx->dstW * sizeof(dst0[0])))
353  fail();
354  bench_new(NULL, dst0, ctx->dstW, src, filter, filterPosAvx, width);
355  }
356  }
357  }
358  }
360 }
361 
363 {
364  check_hscale();
365  report("hscale");
366  check_yuv2yuv1(0);
367  check_yuv2yuv1(1);
368  report("yuv2yuv1");
369  check_yuv2yuvX(0);
370  check_yuv2yuvX(1);
371  report("yuv2yuvX");
372 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:176
check_yuv2yuv1
static void check_yuv2yuv1(int accurate)
Definition: sw_scale.c:100
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:516
mem_internal.h
check_func
#define check_func(func,...)
Definition: checkasm.h:170
b
#define b
Definition: input.c:41
test
Definition: idctdsp.c:35
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
call_ref
#define call_ref(...)
Definition: checkasm.h:185
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
print_data
static void print_data(uint8_t *p, size_t len, size_t offset)
Definition: sw_scale.c:64
fail
#define fail()
Definition: checkasm.h:179
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:78
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:260
rnd
#define rnd()
Definition: checkasm.h:163
width
#define width
intreadwrite.h
offsets
static const int offsets[]
Definition: hevc_pel.c:34
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1406
LARGEST_FILTER
#define LARGEST_FILTER
cmp_off_by_n
static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
Definition: sw_scale.c:55
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
ctx
AVFormatContext * ctx
Definition: movenc.c:49
yuv2planeX_8_ref
static void yuv2planeX_8_ref(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:39
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:144
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:258
call_new
#define call_new(...)
Definition: checkasm.h:288
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1213
abs
#define abs(x)
Definition: cuda_runtime.h:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_init_scale
void ff_sws_init_scale(SwsContext *c)
Definition: swscale.c:591
ff_shuffle_filter_coefficients
int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)
Definition: utils.c:301
check_yuv2yuvX
static void check_yuv2yuvX(int accurate)
Definition: sw_scale.c:160
printf
printf("static const uint8_t my_array[100] = {\n")
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:90
show_differences
static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
Definition: sw_scale.c:81
report
#define report
Definition: checkasm.h:182
bench_new
#define bench_new(...)
Definition: checkasm.h:358
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
len
int len
Definition: vorbis_enc_data.h:426
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:32
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:2069
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2433
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:112
av_clip_uint8
#define av_clip_uint8
Definition: common.h:105
mem.h
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:174
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:362
SwsContext
Definition: swscale_internal.h:301
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:61