FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 
26 #include "libswscale/swscale.h"
28 
29 #include "checkasm.h"
30 
31 #define randomize_buffers(buf, size) \
32  do { \
33  int j; \
34  for (j = 0; j < size; j+=4) \
35  AV_WN32(buf + j, rnd()); \
36  } while (0)
37 
38 // This reference function is the same approximate algorithm employed by the
39 // SIMD functions
40 static void ref_function(const int16_t *filter, int filterSize,
41  const int16_t **src, uint8_t *dest, int dstW,
42  const uint8_t *dither, int offset)
43 {
44  int i, d;
45  d = ((filterSize - 1) * 8 + dither[0]) >> 4;
46  for ( i = 0; i < dstW; i++) {
47  int16_t val = d;
48  int j;
49  union {
50  int val;
51  int16_t v[2];
52  } t;
53  for (j = 0; j < filterSize; j++){
54  t.val = (int)src[j][i + offset] * (int)filter[j];
55  val += t.v[1];
56  }
57  dest[i]= av_clip_uint8(val>>3);
58  }
59 }
60 
61 static void check_yuv2yuvX(void)
62 {
63  struct SwsContext *ctx;
64  int fsi, osi, isi, i, j;
65  int dstW;
66 #define LARGEST_FILTER 16
67 #define FILTER_SIZES 4
68  static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
69 #define LARGEST_INPUT_SIZE 512
70 #define INPUT_SIZES 6
71  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
72 
73  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
74  int filterSize, const int16_t **src, uint8_t *dest,
75  int dstW, const uint8_t *dither, int offset);
76 
77  const int16_t **src;
78  LOCAL_ALIGNED_8(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
79  LOCAL_ALIGNED_8(int16_t, filter_coeff, [LARGEST_FILTER]);
80  LOCAL_ALIGNED_8(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
81  LOCAL_ALIGNED_8(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
83  union VFilterData{
84  const int16_t *src;
85  uint16_t coeff[8];
86  } *vFilterData;
87  uint8_t d_val = rnd();
88  memset(dither, d_val, LARGEST_INPUT_SIZE);
89  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
90  randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
92  if (sws_init_context(ctx, NULL, NULL) < 0)
93  fail();
94 
96  for(isi = 0; isi < INPUT_SIZES; ++isi){
97  dstW = input_sizes[isi];
98  for(osi = 0; osi < 64; osi += 16){
99  for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
100  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
101  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
102  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
103  for(i = 0; i < filter_sizes[fsi]; ++i){
104  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
105  vFilterData[i].src = src[i];
106  for(j = 0; j < 4; ++j)
107  vFilterData[i].coeff[j + 4] = filter_coeff[i];
108  }
109  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){
110  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
111  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
112 
113  // The reference function is not the scalar function selected when mmx
114  // is deactivated as the SIMD functions do not give the same result as
115  // the scalar ones due to rounding. The SIMD functions are activated by
116  // the flag SWS_ACCURATE_RND
117  ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
118  // There's no point in calling new for the reference function
119  if(ctx->use_mmx_vfilter){
120  call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
121  if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
122  fail();
123  if(dstW == LARGEST_INPUT_SIZE)
124  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
125  }
126  }
127  av_freep(&src);
128  av_freep(&vFilterData);
129  }
130  }
131  }
133 #undef FILTER_SIZES
134 }
135 
136 #undef SRC_PIXELS
137 #define SRC_PIXELS 512
138 
139 static void check_hscale(void)
140 {
141 #define MAX_FILTER_WIDTH 40
142 #define FILTER_SIZES 6
143  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
144 
145 #define HSCALE_PAIRS 2
146  static const int hscale_pairs[HSCALE_PAIRS][2] = {
147  { 8, 14 },
148  { 8, 18 },
149  };
150 
151  int i, j, fsi, hpi, width;
152  struct SwsContext *ctx;
153 
154  // padded
155  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
156  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
157  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
158 
159  // padded
161  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
162  LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
163  LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
164 
165  // The dst parameter here is either int16_t or int32_t but we use void* to
166  // just cover both cases.
167  declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
168  const uint8_t *src, const int16_t *filter,
169  const int32_t *filterPos, int filterSize);
170 
171  int cpu_flags = av_get_cpu_flags();
172 
174  if (sws_init_context(ctx, NULL, NULL) < 0)
175  fail();
176 
178 
179  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
180  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
181  width = filter_sizes[fsi];
182 
183  ctx->srcBpc = hscale_pairs[hpi][0];
184  ctx->dstBpc = hscale_pairs[hpi][1];
185  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
186  ctx->dstW = ctx->chrDstW = SRC_PIXELS;
187 
188  for (i = 0; i < SRC_PIXELS; i++) {
189  filterPos[i] = i;
190  filterPosAvx[i] = i;
191 
192  // These filter cofficients are chosen to try break two corner
193  // cases, namely:
194  //
195  // - Negative filter coefficients. The filters output signed
196  // values, and it should be possible to end up with negative
197  // output values.
198  //
199  // - Positive clipping. The hscale filter function has clipping
200  // at (1<<15) - 1
201  //
202  // The coefficients sum to the 1.0 point for the hscale
203  // functions (1 << 14).
204 
205  for (j = 0; j < width; j++) {
206  filter[i * width + j] = -((1 << 14) / (width - 1));
207  }
208  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
209  }
210 
211  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
212  // These values should be unused in SIMD implementations but
213  // may still be read, random coefficients here should help show
214  // issues where they are used in error.
215 
216  filter[SRC_PIXELS * width + i] = rnd();
217  }
219  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
221  ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
222 
223  if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
224  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
225  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
226 
227  call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
228  call_new(NULL, dst1, SRC_PIXELS, src, filterAvx2, filterPosAvx, width);
229  if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
230  fail();
231  bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPosAvx, width);
232  }
233  }
234  }
236 }
237 
239 {
240  check_hscale();
241  report("hscale");
242  check_yuv2yuvX();
243  report("yuv2yuvX");
244 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:125
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:515
mem_internal.h
check_yuv2yuvX
static void check_yuv2yuvX(void)
Definition: sw_scale.c:61
check_func
#define check_func(func,...)
Definition: checkasm.h:119
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:98
call_ref
#define call_ref(...)
Definition: checkasm.h:134
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
fail
#define fail()
Definition: checkasm.h:128
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:76
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:139
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:57
rnd
#define rnd()
Definition: checkasm.h:112
width
#define width
intreadwrite.h
LARGEST_FILTER
#define LARGEST_FILTER
ctx
AVFormatContext * ctx
Definition: movenc.c:48
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:124
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:137
call_new
#define call_new(...)
Definition: checkasm.h:206
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1154
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_init_scale
void ff_sws_init_scale(SwsContext *c)
Definition: swscale.c:593
AV_CPU_FLAG_AVX2
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:52
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:131
bench_new
#define bench_new(...)
Definition: checkasm.h:269
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:31
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:1296
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2383
ref_function
static void ref_function(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:40
av_clip_uint8
#define av_clip_uint8
Definition: common.h:102
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
d
d
Definition: ffmpeg_filter.c:153
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:78
ff_shuffle_filter_coefficients
void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)
Definition: utils.c:281
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:238
SwsContext
Definition: swscale_internal.h:300
int
int
Definition: ffmpeg_filter.c:153
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58