FFmpeg
float_dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <float.h>
20 #include <stdint.h>
21 
22 #include "libavutil/float_dsp.h"
23 #include "libavutil/internal.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "checkasm.h"
28 
29 #define LEN 256
30 
31 static void test_vector_fmul(const float *src0, const float *src1)
32 {
33  LOCAL_ALIGNED_32(float, cdst, [LEN]);
34  LOCAL_ALIGNED_32(float, odst, [LEN]);
35  int i;
36 
37  declare_func(void, float *dst, const float *src0, const float *src1,
38  int len);
39 
40  call_ref(cdst, src0, src1, LEN);
41  call_new(odst, src0, src1, LEN);
42  for (i = 0; i < LEN; i++) {
43  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
44  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
45  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
46  i, cdst[i], odst[i], cdst[i] - odst[i]);
47  fail();
48  break;
49  }
50  }
51  bench_new(odst, src0, src1, LEN);
52 }
53 
54 static void test_vector_dmul(const double *src0, const double *src1)
55 {
56  LOCAL_ALIGNED_32(double, cdst, [LEN]);
57  LOCAL_ALIGNED_32(double, odst, [LEN]);
58  int i;
59 
60  declare_func(void, double *dst, const double *src0, const double *src1,
61  int len);
62 
63  call_ref(cdst, src0, src1, LEN);
64  call_new(odst, src0, src1, LEN);
65  for (i = 0; i < LEN; i++) {
66  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
67  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
68  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
69  i, cdst[i], odst[i], cdst[i] - odst[i]);
70  fail();
71  break;
72  }
73  }
74  bench_new(odst, src0, src1, LEN);
75 }
76 
77 #define ARBITRARY_FMUL_ADD_CONST 0.005
78 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
79 {
80  LOCAL_ALIGNED_32(float, cdst, [LEN]);
81  LOCAL_ALIGNED_32(float, odst, [LEN]);
82  int i;
83 
84  declare_func(void, float *dst, const float *src0, const float *src1,
85  const float *src2, int len);
86 
87  call_ref(cdst, src0, src1, src2, LEN);
88  call_new(odst, src0, src1, src2, LEN);
89  for (i = 0; i < LEN; i++) {
90  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
91  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
92  i, cdst[i], odst[i], cdst[i] - odst[i]);
93  fail();
94  break;
95  }
96  }
97  bench_new(odst, src0, src1, src2, LEN);
98 }
99 
100 static void test_vector_fmul_scalar(const float *src0, const float *src1)
101 {
102  LOCAL_ALIGNED_16(float, cdst, [LEN]);
103  LOCAL_ALIGNED_16(float, odst, [LEN]);
104  int i;
105 
106  declare_func(void, float *dst, const float *src, float mul, int len);
107 
108  call_ref(cdst, src0, src1[0], LEN);
109  call_new(odst, src0, src1[0], LEN);
110  for (i = 0; i < LEN; i++) {
111  double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
112  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
113  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
114  i, cdst[i], odst[i], cdst[i] - odst[i]);
115  fail();
116  break;
117  }
118  }
119  bench_new(odst, src0, src1[0], LEN);
120 }
121 
122 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
123 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
124 {
125  LOCAL_ALIGNED_16(float, cdst, [LEN]);
126  LOCAL_ALIGNED_16(float, odst, [LEN]);
127  int i;
128 
129  declare_func(void, float *dst, const float *src0, const float *src1,
130  const float *win, int len);
131 
132  call_ref(cdst, src0, src1, win, LEN / 2);
133  call_new(odst, src0, src1, win, LEN / 2);
134  for (i = 0; i < LEN; i++) {
135  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
136  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
137  i, cdst[i], odst[i], cdst[i] - odst[i]);
138  fail();
139  break;
140  }
141  }
142  bench_new(odst, src0, src1, win, LEN / 2);
143 }
144 
145 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
146 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
147 {
148  LOCAL_ALIGNED_32(float, cdst, [LEN]);
149  LOCAL_ALIGNED_32(float, odst, [LEN]);
150  int i;
151 
152  declare_func(void, float *dst, const float *src, float mul, int len);
153 
154  memcpy(cdst, src2, LEN * sizeof(*src2));
155  memcpy(odst, src2, LEN * sizeof(*src2));
156 
157  call_ref(cdst, src0, src1[0], LEN);
158  call_new(odst, src0, src1[0], LEN);
159  for (i = 0; i < LEN; i++) {
160  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
161  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
162  i, cdst[i], odst[i], cdst[i] - odst[i]);
163  fail();
164  break;
165  }
166  }
167  memcpy(odst, src2, LEN * sizeof(*src2));
168  bench_new(odst, src0, src1[0], LEN);
169 }
170 
171 static void test_vector_dmul_scalar(const double *src0, const double *src1)
172 {
173  LOCAL_ALIGNED_32(double, cdst, [LEN]);
174  LOCAL_ALIGNED_32(double, odst, [LEN]);
175  int i;
176 
177  declare_func(void, double *dst, const double *src, double mul, int len);
178 
179  call_ref(cdst, src0, src1[0], LEN);
180  call_new(odst, src0, src1[0], LEN);
181  for (i = 0; i < LEN; i++) {
182  double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
183  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
184  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
185  cdst[i], odst[i], cdst[i] - odst[i]);
186  fail();
187  break;
188  }
189  }
190  bench_new(odst, src0, src1[0], LEN);
191 }
192 
193 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
194 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
195 {
196  LOCAL_ALIGNED_32(double, cdst, [LEN]);
197  LOCAL_ALIGNED_32(double, odst, [LEN]);
198  int i;
199 
200  declare_func(void, double *dst, const double *src, double mul, int len);
201 
202  memcpy(cdst, src2, LEN * sizeof(*src2));
203  memcpy(odst, src2, LEN * sizeof(*src2));
204  call_ref(cdst, src0, src1[0], LEN);
205  call_new(odst, src0, src1[0], LEN);
206  for (i = 0; i < LEN; i++) {
207  if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
208  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
209  i, cdst[i], odst[i], cdst[i] - odst[i]);
210  fail();
211  break;
212  }
213  }
214  memcpy(odst, src2, LEN * sizeof(*src2));
215  bench_new(odst, src0, src1[0], LEN);
216 }
217 
218 static void test_butterflies_float(const float *src0, const float *src1)
219 {
220  LOCAL_ALIGNED_16(float, cdst, [LEN]);
221  LOCAL_ALIGNED_16(float, odst, [LEN]);
222  LOCAL_ALIGNED_16(float, cdst1, [LEN]);
223  LOCAL_ALIGNED_16(float, odst1, [LEN]);
224  int i;
225 
226  declare_func(void, float *restrict src0, float *restrict src1,
227  int len);
228 
229  memcpy(cdst, src0, LEN * sizeof(*src0));
230  memcpy(cdst1, src1, LEN * sizeof(*src1));
231  memcpy(odst, src0, LEN * sizeof(*src0));
232  memcpy(odst1, src1, LEN * sizeof(*src1));
233 
234  call_ref(cdst, cdst1, LEN);
235  call_new(odst, odst1, LEN);
236  for (i = 0; i < LEN; i++) {
237  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
238  !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
239  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
240  i, cdst[i], odst[i], cdst[i] - odst[i]);
241  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
242  i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
243  fail();
244  break;
245  }
246  }
247  memcpy(odst, src0, LEN * sizeof(*src0));
248  memcpy(odst1, src1, LEN * sizeof(*src1));
249  bench_new(odst, odst1, LEN);
250 }
251 
252 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
253 static void test_scalarproduct_float(const float *src0, const float *src1)
254 {
255  float cprod, oprod;
256 
257  declare_func_float(float, const float *src0, const float *src1, int len);
258 
259  cprod = call_ref(src0, src1, LEN);
260  oprod = call_new(src0, src1, LEN);
262  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
263  cprod, oprod, cprod - oprod);
264  fail();
265  }
266  bench_new(src0, src1, LEN);
267 }
268 
269 static void test_scalarproduct_double(const double *src0, const double *src1)
270 {
271  double cprod, oprod;
272 
273  declare_func_float(double, const double *, const double *, size_t);
274 
275  cprod = call_ref(src0, src1, LEN);
276  oprod = call_new(src0, src1, LEN);
278  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
279  cprod, oprod, cprod - oprod);
280  fail();
281  }
282  bench_new(src0, src1, LEN);
283 }
284 
286 {
287  LOCAL_ALIGNED_32(float, src0, [LEN]);
288  LOCAL_ALIGNED_32(float, src1, [LEN]);
289  LOCAL_ALIGNED_32(float, src2, [LEN]);
290  LOCAL_ALIGNED_16(float, src3, [LEN]);
291  LOCAL_ALIGNED_16(float, src4, [LEN]);
292  LOCAL_ALIGNED_16(float, src5, [LEN]);
293  LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
294  LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
295  LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
297 
298  if (!fdsp) {
299  fprintf(stderr, "floatdsp: Out of memory error\n");
300  return;
301  }
302 
303  randomize_stddev(src0, LEN, 10.0);
304  randomize_stddev(src1, LEN, 10.0);
305  randomize_stddev(src2, LEN, 10.0);
306  randomize_stddev(src3, LEN, 10.0);
307  randomize_stddev(src4, LEN, 10.0);
308  randomize_stddev(src5, LEN, 10.0);
309  randomize_stddev_dbl(dbl_src0, LEN, 10.0);
310  randomize_stddev_dbl(dbl_src1, LEN, 10.0);
311  randomize_stddev_dbl(dbl_src2, LEN, 10.0);
312 
313  if (check_func(fdsp->vector_fmul, "vector_fmul"))
315  if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
317  if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
318  test_vector_fmul_scalar(src3, src4);
319  if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
321  if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
322  test_vector_fmul_window(src3, src4, src5);
323  report("vector_fmul");
324  if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
326  report("vector_fmac");
327  if (check_func(fdsp->vector_dmul, "vector_dmul"))
328  test_vector_dmul(dbl_src0, dbl_src1);
329  if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
330  test_vector_dmul_scalar(dbl_src0, dbl_src1);
331  report("vector_dmul");
332  if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
333  test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
334  report("vector_dmac");
335  if (check_func(fdsp->butterflies_float, "butterflies_float"))
336  test_butterflies_float(src3, src4);
337  report("butterflies_float");
338  if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
339  test_scalarproduct_float(src3, src4);
340  report("scalarproduct_float");
341  if (check_func(fdsp->scalarproduct_double, "scalarproduct_double"))
342  test_scalarproduct_double(dbl_src0, dbl_src1);
343  report("scalarproduct_double");
344 
345  av_freep(&fdsp);
346 }
AVFloatDSPContext::butterflies_float
void(* butterflies_float)(float *restrict v1, float *restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: float_dsp.h:164
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
AVFloatDSPContext::vector_fmul_reverse
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats, and store the result in a vector of floats...
Definition: float_dsp.h:154
test_scalarproduct_double
static void test_scalarproduct_double(const double *src0, const double *src1)
Definition: float_dsp.c:269
check_func
#define check_func
Definition: test.h:480
float.h
test_vector_dmac_scalar
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
Definition: float_dsp.c:194
AVFloatDSPContext::vector_dmul
void(* vector_dmul)(double *dst, const double *src0, const double *src1, int len)
Calculate the entry wise product of two vectors of doubles and store the result in a vector of double...
Definition: float_dsp.h:190
bench_new
#define bench_new
Definition: test.h:486
win
static float win(SuperEqualizerContext *s, float n, int N)
Definition: af_superequalizer.c:119
call_ref
#define call_ref
Definition: test.h:484
ARBITRARY_FMUL_ADD_CONST
#define ARBITRARY_FMUL_ADD_CONST
Definition: float_dsp.c:77
checkasm.h
checkasm_check_float_dsp
void checkasm_check_float_dsp(void)
Definition: float_dsp.c:285
AVFloatDSPContext::scalarproduct_float
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
test_butterflies_float
static void test_butterflies_float(const float *src0, const float *src1)
Definition: float_dsp.c:218
test_vector_fmul_scalar
static void test_vector_fmul_scalar(const float *src0, const float *src1)
Definition: float_dsp.c:100
declare_func
#define declare_func
Definition: test.h:488
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
fail
#define fail
Definition: test.h:478
float_near_abs_eps
#define float_near_abs_eps
Definition: utils.h:371
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
test_vector_fmul
static void test_vector_fmul(const float *src0, const float *src1)
Definition: float_dsp.c:31
AVFloatDSPContext::vector_fmul_scalar
void(* vector_fmul_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float.
Definition: float_dsp.h:85
ARBITRARY_FMAC_SCALAR_CONST
#define ARBITRARY_FMAC_SCALAR_CONST
Definition: float_dsp.c:145
AVFloatDSPContext::scalarproduct_double
double(* scalarproduct_double)(const double *v1, const double *v2, size_t len)
Calculate the scalar product of two vectors of doubles.
Definition: float_dsp.h:205
randomize_stddev_dbl
#define randomize_stddev_dbl(buf, size, stddev)
Definition: checkasm.h:146
float_dsp.h
AVFloatDSPContext::vector_fmul
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats.
Definition: float_dsp.h:38
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
ARBITRARY_FMUL_WINDOW_CONST
#define ARBITRARY_FMUL_WINDOW_CONST
Definition: float_dsp.c:122
test_vector_dmul_scalar
static void test_vector_dmul_scalar(const double *src0, const double *src1)
Definition: float_dsp.c:171
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AVFloatDSPContext
Definition: float_dsp.h:24
test_vector_fmul_add
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:78
LEN
#define LEN
Definition: float_dsp.c:29
ARBITRARY_SCALARPRODUCT_CONST
#define ARBITRARY_SCALARPRODUCT_CONST
Definition: float_dsp.c:252
AVFloatDSPContext::vector_fmac_scalar
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
Definition: float_dsp.h:54
declare_func_float
#define declare_func_float
Definition: checkasm.h:141
internal.h
src2
const pixel * src2
Definition: h264pred_template.c:421
AVFloatDSPContext::vector_fmul_add
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Calculate the entry wise product of two vectors of floats, add a third vector of floats and store the...
Definition: float_dsp.h:137
len
int len
Definition: vorbis_enc_data.h:426
call_new
#define call_new
Definition: test.h:485
double_near_abs_eps
#define double_near_abs_eps
Definition: utils.h:376
ARBITRARY_DMAC_SCALAR_CONST
#define ARBITRARY_DMAC_SCALAR_CONST
Definition: float_dsp.c:193
test_vector_fmul_window
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
Definition: float_dsp.c:123
AVFloatDSPContext::vector_dmul_scalar
void(* vector_dmul_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of double by a scalar double.
Definition: float_dsp.h:100
test_scalarproduct_float
static void test_scalarproduct_float(const float *src0, const float *src1)
Definition: float_dsp.c:253
src0
const pixel *const src0
Definition: h264pred_template.c:419
report
#define report
Definition: test.h:479
mem.h
AVFloatDSPContext::vector_fmul_window
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Overlap/add with window function.
Definition: float_dsp.h:119
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
AVFloatDSPContext::vector_dmac_scalar
void(* vector_dmac_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of doubles by a scalar double and add to destination vector.
Definition: float_dsp.h:70
test_vector_dmul
static void test_vector_dmul(const double *src0, const double *src1)
Definition: float_dsp.c:54
src
#define src
Definition: vp8dsp.c:248
test_vector_fmac_scalar
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:146
randomize_stddev
#define randomize_stddev(buf, size, stddev)
Definition: checkasm.h:144