FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 
26 #include "libswscale/swscale.h"
28 
29 #include "checkasm.h"
30 
31 #define randomize_buffers(buf, size) \
32  do { \
33  int j; \
34  for (j = 0; j < size; j+=4) \
35  AV_WN32(buf + j, rnd()); \
36  } while (0)
37 
38 // This reference function is the same approximate algorithm employed by the
39 // SIMD functions
40 static void ref_function(const int16_t *filter, int filterSize,
41  const int16_t **src, uint8_t *dest, int dstW,
42  const uint8_t *dither, int offset)
43 {
44  int i, d;
45  d = ((filterSize - 1) * 8 + dither[0]) >> 4;
46  for ( i = 0; i < dstW; i++) {
47  int16_t val = d;
48  int j;
49  union {
50  int val;
51  int16_t v[2];
52  } t;
53  for (j = 0; j < filterSize; j++){
54  t.val = (int)src[j][i + offset] * (int)filter[j];
55  val += t.v[1];
56  }
57  dest[i]= av_clip_uint8(val>>3);
58  }
59 }
60 
61 static void check_yuv2yuvX(void)
62 {
63  struct SwsContext *ctx;
64  int fsi, osi, isi, i, j;
65  int dstW;
66 #define LARGEST_FILTER 16
67 #define FILTER_SIZES 4
68  static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
69 #define LARGEST_INPUT_SIZE 512
70 #define INPUT_SIZES 6
71  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
72 
73  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
74  int filterSize, const int16_t **src, uint8_t *dest,
75  int dstW, const uint8_t *dither, int offset);
76 
77  const int16_t **src;
78  LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
79  LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);
80  LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
81  LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
83  union VFilterData{
84  const int16_t *src;
85  uint16_t coeff[8];
86  } *vFilterData;
87  uint8_t d_val = rnd();
88  memset(dither, d_val, LARGEST_INPUT_SIZE);
89  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
90  randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
92  if (sws_init_context(ctx, NULL, NULL) < 0)
93  fail();
94 
96  for(isi = 0; isi < INPUT_SIZES; ++isi){
97  dstW = input_sizes[isi];
98  for(osi = 0; osi < 64; osi += 16){
99  for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
100  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
101  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
102  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
103  for(i = 0; i < filter_sizes[fsi]; ++i){
104  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
105  vFilterData[i].src = src[i];
106  for(j = 0; j < 4; ++j)
107  vFilterData[i].coeff[j + 4] = filter_coeff[i];
108  }
109  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){
110  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
111  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
112 
113  // The reference function is not the scalar function selected when mmx
114  // is deactivated as the SIMD functions do not give the same result as
115  // the scalar ones due to rounding. The SIMD functions are activated by
116  // the flag SWS_ACCURATE_RND
117  ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
118  // There's no point in calling new for the reference function
119  if(ctx->use_mmx_vfilter){
120  call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
121  if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
122  fail();
123  if(dstW == LARGEST_INPUT_SIZE)
124  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
125  }
126  }
127  av_freep(&src);
128  av_freep(&vFilterData);
129  }
130  }
131  }
133 #undef FILTER_SIZES
134 }
135 
136 #undef SRC_PIXELS
137 #define SRC_PIXELS 512
138 
139 static void check_hscale(void)
140 {
141 #define MAX_FILTER_WIDTH 40
142 #define FILTER_SIZES 6
143  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };
144 
145 #define HSCALE_PAIRS 2
146  static const int hscale_pairs[HSCALE_PAIRS][2] = {
147  { 8, 14 },
148  { 8, 18 },
149  };
150 
151 #define LARGEST_INPUT_SIZE 512
152 #define INPUT_SIZES 6
153  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
154 
155  int i, j, fsi, hpi, width, dstWi;
156  struct SwsContext *ctx;
157 
158  // padded
159  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
160  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
161  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
162 
163  // padded
165  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
166  LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);
167  LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);
168 
169  // The dst parameter here is either int16_t or int32_t but we use void* to
170  // just cover both cases.
171  declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
172  const uint8_t *src, const int16_t *filter,
173  const int32_t *filterPos, int filterSize);
174 
175  int cpu_flags = av_get_cpu_flags();
176 
178  if (sws_init_context(ctx, NULL, NULL) < 0)
179  fail();
180 
182 
183  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
184  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
185  for (dstWi = 0; dstWi < INPUT_SIZES; dstWi++) {
186  width = filter_sizes[fsi];
187 
188  ctx->srcBpc = hscale_pairs[hpi][0];
189  ctx->dstBpc = hscale_pairs[hpi][1];
190  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
191 
192  for (i = 0; i < SRC_PIXELS; i++) {
193  filterPos[i] = i;
194  filterPosAvx[i] = i;
195 
196  // These filter cofficients are chosen to try break two corner
197  // cases, namely:
198  //
199  // - Negative filter coefficients. The filters output signed
200  // values, and it should be possible to end up with negative
201  // output values.
202  //
203  // - Positive clipping. The hscale filter function has clipping
204  // at (1<<15) - 1
205  //
206  // The coefficients sum to the 1.0 point for the hscale
207  // functions (1 << 14).
208 
209  for (j = 0; j < width; j++) {
210  filter[i * width + j] = -((1 << 14) / (width - 1));
211  }
212  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
213  }
214 
215  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
216  // These values should be unused in SIMD implementations but
217  // may still be read, random coefficients here should help show
218  // issues where they are used in error.
219 
220  filter[SRC_PIXELS * width + i] = rnd();
221  }
222  ctx->dstW = ctx->chrDstW = input_sizes[dstWi];
224  memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
226  ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
227 
228  if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {
229  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
230  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
231 
232  call_ref(NULL, dst0, ctx->dstW, src, filter, filterPos, width);
233  call_new(NULL, dst1, ctx->dstW, src, filterAvx2, filterPosAvx, width);
234  if (memcmp(dst0, dst1, ctx->dstW * sizeof(dst0[0])))
235  fail();
236  bench_new(NULL, dst0, ctx->dstW, src, filter, filterPosAvx, width);
237  }
238  }
239  }
240  }
242 }
243 
245 {
246  check_hscale();
247  report("hscale");
248  check_yuv2yuvX();
249  report("yuv2yuvX");
250 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:128
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:513
mem_internal.h
check_yuv2yuvX
static void check_yuv2yuvX(void)
Definition: sw_scale.c:61
check_func
#define check_func(func,...)
Definition: checkasm.h:122
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:101
call_ref
#define call_ref(...)
Definition: checkasm.h:137
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
fail
#define fail()
Definition: checkasm.h:131
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:77
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:139
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:58
rnd
#define rnd()
Definition: checkasm.h:115
width
#define width
intreadwrite.h
LARGEST_FILTER
#define LARGEST_FILTER
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
ctx
AVFormatContext * ctx
Definition: movenc.c:48
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:137
call_new
#define call_new(...)
Definition: checkasm.h:209
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1150
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_init_scale
void ff_sws_init_scale(SwsContext *c)
Definition: swscale.c:589
ff_shuffle_filter_coefficients
int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)
Definition: utils.c:262
AV_CPU_FLAG_AVX2
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:52
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:134
bench_new
#define bench_new(...)
Definition: checkasm.h:272
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:31
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:1292
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2381
ref_function
static void ref_function(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:40
av_clip_uint8
#define av_clip_uint8
Definition: common.h:101
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
d
d
Definition: ffmpeg_filter.c:153
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:78
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:244
SwsContext
Definition: swscale_internal.h:298
int
int
Definition: ffmpeg_filter.c:153
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58