FFmpeg
aacpsdsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 #include "libavutil/mem_internal.h"
22 
23 #include "checkasm.h"
24 
25 #define N 32
26 #define STRIDE 128
27 #define BUF_SIZE (N * STRIDE)
28 
29 #define randomize(buf, len) do { \
30  int i; \
31  for (i = 0; i < len; i++) { \
32  const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
33  (buf)[i] = f; \
34  } \
35 } while (0)
36 
37 #define EPS 0.005
38 
39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40 {
41  int i;
42  for (i = 0; i < len; i++) {
43  union av_intfloat32 u = { .f = buf[i] };
44  u.i &= (0xffffffff << bits);
45  buf[i] = u.f;
46  }
47 }
48 
49 static void test_add_squares(void)
50 {
54 
55  declare_func(void, INTFLOAT *dst,
56  const INTFLOAT (*src)[2], int n);
57 
58  randomize((INTFLOAT *)src, BUF_SIZE * 2);
59  randomize(dst0, BUF_SIZE);
60  memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61  call_ref(dst0, src, BUF_SIZE);
62  call_new(dst1, src, BUF_SIZE);
63  if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64  fail();
65  bench_new(dst1, src, BUF_SIZE);
66 }
67 
68 static void test_mul_pair_single(void)
69 {
70  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
74 
75  declare_func(void, INTFLOAT (*dst)[2],
76  INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77 
79  randomize(src1, BUF_SIZE);
80  call_ref(dst0, src0, src1, BUF_SIZE);
81  call_new(dst1, src0, src1, BUF_SIZE);
82  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83  fail();
84  bench_new(dst1, src0, src1, BUF_SIZE);
85 }
86 
87 static void test_hybrid_analysis(void)
88 {
89  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91  LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92  LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93 
94  declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95  const INTFLOAT (*filter)[8][2],
96  ptrdiff_t stride, int n);
97 
98  randomize((INTFLOAT *)in, 13 * 2);
99  randomize((INTFLOAT *)filter, N * 8 * 2);
100 
101  randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102  memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103 
104  call_ref(dst0, in, filter, STRIDE, N);
105  call_new(dst1, in, filter, STRIDE, N);
106 
107  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108  fail();
109  bench_new(dst1, in, filter, STRIDE, N);
110 }
111 
113 {
114  LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
115  LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116  LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117 
118  declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119  int i, int len);
120 
121  randomize((INTFLOAT *)out0, 91 * 32 * 2);
122  randomize((INTFLOAT *)in, 2 * 38 * 64);
123  memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124 
125  /* len is hardcoded to 32 as that's the only value used in
126  libavcodec. asm functions are likely to be optimized
127  hardcoding this value in their loops and could fail with
128  anything else.
129  i is hardcoded to the two values currently used by the
130  aac decoder because the arm neon implementation is
131  micro-optimized for them and will fail for almost every
132  other value. */
133  call_ref(out0, in, 3, 32);
134  call_new(out1, in, 3, 32);
135 
136  /* the function just moves data around, so memcmp is enough */
137  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138  fail();
139 
140  call_ref(out0, in, 5, 32);
141  call_new(out1, in, 5, 32);
142 
143  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144  fail();
145 
146  bench_new(out1, in, 3, 32);
147 }
148 
150 {
151  LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152  LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153  LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
154 
155  declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156  int i, int len);
157 
158  randomize((INTFLOAT *)in, 91 * 32 * 2);
159  randomize((INTFLOAT *)out0, 2 * 38 * 64);
160  memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161 
162  /* len is hardcoded to 32 as that's the only value used in
163  libavcodec. asm functions are likely to be optimized
164  hardcoding this value in their loops and could fail with
165  anything else.
166  i is hardcoded to the two values currently used by the
167  aac decoder because the arm neon implementation is
168  micro-optimized for them and will fail for almost every
169  other value. */
170  call_ref(out0, in, 3, 32);
171  call_new(out1, in, 3, 32);
172 
173  /* the function just moves data around, so memcmp is enough */
174  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175  fail();
176 
177  call_ref(out0, in, 5, 32);
178  call_new(out1, in, 5, 32);
179 
180  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181  fail();
182 
183  bench_new(out1, in, 3, 32);
184 }
185 
187 {
188  int i;
189  LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
191  LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192  LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193  LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194  LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195  LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196  LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197 
198  declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199  INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200 
201  randomize((INTFLOAT *)l, BUF_SIZE * 2);
202  randomize((INTFLOAT *)r, BUF_SIZE * 2);
203 
204  for (i = 0; i < 2; i++) {
205  if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206  memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208  memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210 
211  randomize((INTFLOAT *)h, 2 * 4);
212  randomize((INTFLOAT *)h_step, 2 * 4);
213  // Clear the least significant 14 bits of h_step, to avoid
214  // divergence when accumulating h_step BUF_SIZE times into
215  // a float variable which may or may not have extra intermediate
216  // precision. Therefore clear roughly log2(BUF_SIZE) less
217  // significant bits, to get the same result regardless of any
218  // extra precision in the accumulator.
219  clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220 
221  call_ref(l0, r0, h, h_step, BUF_SIZE);
222  call_new(l1, r1, h, h_step, BUF_SIZE);
223  if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224  !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225  fail();
226 
227  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229  bench_new(l1, r1, h, h_step, BUF_SIZE);
230  }
231  }
232 }
233 
235 {
236  PSDSPContext psdsp;
237 
238  ff_psdsp_init(&psdsp);
239 
240  if (check_func(psdsp.add_squares, "ps_add_squares"))
242  report("add_squares");
243 
244  if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
246  report("mul_pair_single");
247 
248  if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
250  report("hybrid_analysis");
251 
252  if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
254  report("hybrid_analysis_ileave");
255 
256  if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
258  report("hybrid_synthesis_deint");
259 
260  test_stereo_interpolate(&psdsp);
261  report("stereo_interpolate");
262 }
void AAC_RENAME() ff_psdsp_init(PSDSPContext *s)
void(* mul_pair_single)(INTFLOAT(*dst)[2], INTFLOAT(*src0)[2], INTFLOAT *src1, int n)
Definition: aacpsdsp.h:34
int float_near_abs_eps_array(const float *a, const float *b, float eps, unsigned len)
Definition: checkasm.c:328
#define EPS
Definition: aacpsdsp.c:37
#define N
Definition: aacpsdsp.c:25
void(* hybrid_synthesis_deint)(INTFLOAT out[2][38][64], INTFLOAT(*in)[32][2], int i, int len)
Definition: aacpsdsp.h:41
#define report
Definition: checkasm.h:126
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
void checkasm_check_aacpsdsp(void)
Definition: aacpsdsp.c:234
float INTFLOAT
Definition: aac_defines.h:88
static void test_hybrid_synthesis_deint(void)
Definition: aacpsdsp.c:149
void(* add_squares)(INTFLOAT *dst, const INTFLOAT(*src)[2], int n)
Definition: aacpsdsp.h:33
#define src
Definition: vp8dsp.c:255
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define declare_func(ret,...)
Definition: checkasm.h:118
const char * r
Definition: vf_curves.c:114
void(* hybrid_analysis)(INTFLOAT(*out)[2], INTFLOAT(*in)[2], const INTFLOAT(*filter)[8][2], ptrdiff_t stride, int n)
Definition: aacpsdsp.h:36
uint8_t bits
Definition: vp3data.h:141
#define STRIDE
Definition: aacpsdsp.c:26
#define fail()
Definition: checkasm.h:123
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
Definition: aacpsdsp.c:39
void(* stereo_interpolate[2])(INTFLOAT(*l)[2], INTFLOAT(*r)[2], INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len)
Definition: aacpsdsp.h:49
uint32_t i
Definition: intfloat.h:28
#define L(x)
Definition: vp56_arith.h:36
#define call_ref(...)
Definition: checkasm.h:129
static void test_hybrid_analysis_ileave(void)
Definition: aacpsdsp.c:112
static void test_add_squares(void)
Definition: aacpsdsp.c:49
static void test_mul_pair_single(void)
Definition: aacpsdsp.c:68
#define src1
Definition: h264pred.c:140
#define randomize(buf, len)
Definition: aacpsdsp.c:29
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define check_func(func,...)
Definition: checkasm.h:114
#define BUF_SIZE
Definition: aacpsdsp.c:27
#define src0
Definition: h264pred.c:139
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
void(* hybrid_analysis_ileave)(INTFLOAT(*out)[32][2], INTFLOAT L[2][38][64], int i, int len)
Definition: aacpsdsp.h:39
static void test_stereo_interpolate(PSDSPContext *psdsp)
Definition: aacpsdsp.c:186
int len
#define bench_new(...)
Definition: checkasm.h:261
static void test_hybrid_analysis(void)
Definition: aacpsdsp.c:87
FILE * out
Definition: movenc.c:54
#define call_new(...)
Definition: checkasm.h:201
int i
Definition: input.c:407