FFmpeg
fft_init.c
Go to the documentation of this file.
1 /*
2  * FFT/IFFT transforms
3  * AltiVec-enabled
4  * Copyright (c) 2009 Loren Merritt
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/ppc/cpu.h"
27 #include "libavcodec/fft.h"
28 
29 /**
30  * Do a complex FFT with the parameters defined in ff_fft_init().
31  * The input data must be permuted before with s->revtab table.
32  * No 1.0 / sqrt(n) normalization is done.
33  * AltiVec-enabled:
34  * This code assumes that the 'z' pointer is 16 bytes-aligned.
35  * It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
36  */
37 
38 #if HAVE_VSX
39 #include "fft_vsx.h"
40 #else
43 #endif
44 
45 #if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
46 static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
47 {
48  int j, k;
49  int n = 1 << s->mdct_bits;
50  int n4 = n >> 2;
51  int n8 = n >> 3;
52  int n32 = n >> 5;
53  const uint16_t *revtabj = s->revtab;
54  const uint16_t *revtabk = s->revtab+n4;
55  const vec_f *tcos = (const vec_f*)(s->tcos+n8);
56  const vec_f *tsin = (const vec_f*)(s->tsin+n8);
57  const vec_f *pin = (const vec_f*)(input+n4);
58  vec_f *pout = (vec_f*)(output+n4);
59 
60  /* pre rotation */
61  k = n32-1;
62  do {
63  vec_f cos,sin,cos0,sin0,cos1,sin1,re,im,r0,i0,r1,i1,a,b,c,d;
64 #define CMULA(p,o0,o1,o2,o3)\
65  a = pin[ k*2+p]; /* { z[k].re, z[k].im, z[k+1].re, z[k+1].im } */\
66  b = pin[-k*2-p-1]; /* { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } */\
67  re = vec_perm(a, b, vcprm(0,2,s0,s2)); /* { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } */\
68  im = vec_perm(a, b, vcprm(s3,s1,3,1)); /* { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } */\
69  cos = vec_perm(cos0, cos1, vcprm(o0,o1,s##o2,s##o3)); /* { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } */\
70  sin = vec_perm(sin0, sin1, vcprm(o0,o1,s##o2,s##o3));\
71  r##p = im*cos - re*sin;\
72  i##p = re*cos + im*sin;
73 #define STORE2(v,dst)\
74  j = dst;\
75  vec_ste(v, 0, output+j*2);\
76  vec_ste(v, 4, output+j*2);
77 #define STORE8(p)\
78  a = vec_perm(r##p, i##p, vcprm(0,s0,0,s0));\
79  b = vec_perm(r##p, i##p, vcprm(1,s1,1,s1));\
80  c = vec_perm(r##p, i##p, vcprm(2,s2,2,s2));\
81  d = vec_perm(r##p, i##p, vcprm(3,s3,3,s3));\
82  STORE2(a, revtabk[ p*2-4]);\
83  STORE2(b, revtabk[ p*2-3]);\
84  STORE2(c, revtabj[-p*2+2]);\
85  STORE2(d, revtabj[-p*2+3]);
86 
87  cos0 = tcos[k];
88  sin0 = tsin[k];
89  cos1 = tcos[-k-1];
90  sin1 = tsin[-k-1];
91  CMULA(0, 0,1,2,3);
92  CMULA(1, 2,3,0,1);
93  STORE8(0);
94  STORE8(1);
95  revtabj += 4;
96  revtabk -= 4;
97  k--;
98  } while(k >= 0);
99 
100 #if HAVE_VSX
101  ff_fft_calc_vsx(s, (FFTComplex*)output);
102 #else
103  ff_fft_calc_altivec(s, (FFTComplex*)output);
104 #endif
105 
106  /* post rotation + reordering */
107  j = -n32;
108  k = n32-1;
109  do {
110  vec_f cos,sin,re,im,a,b,c,d;
111 #define CMULB(d0,d1,o)\
112  re = pout[o*2];\
113  im = pout[o*2+1];\
114  cos = tcos[o];\
115  sin = tsin[o];\
116  d0 = im*sin - re*cos;\
117  d1 = re*sin + im*cos;
118 
119  CMULB(a,b,j);
120  CMULB(c,d,k);
121  pout[2*j] = vec_perm(a, d, vcprm(0,s3,1,s2));
122  pout[2*j+1] = vec_perm(a, d, vcprm(2,s1,3,s0));
123  pout[2*k] = vec_perm(c, b, vcprm(0,s3,1,s2));
124  pout[2*k+1] = vec_perm(c, b, vcprm(2,s1,3,s0));
125  j++;
126  k--;
127  } while(k >= 0);
128 }
129 
130 static void imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
131 {
132  int k;
133  int n = 1 << s->mdct_bits;
134  int n4 = n >> 2;
135  int n16 = n >> 4;
136  vec_u32 sign = {1U<<31,1U<<31,1U<<31,1U<<31};
137  vec_u32 *p0 = (vec_u32*)(output+n4);
138  vec_u32 *p1 = (vec_u32*)(output+n4*3);
139 
140  imdct_half_altivec(s, output + n4, input);
141 
142  for (k = 0; k < n16; k++) {
143  vec_u32 a = p0[k] ^ sign;
144  vec_u32 b = p1[-k-1];
145  p0[-k-1] = vec_perm(a, a, vcprm(3,2,1,0));
146  p1[k] = vec_perm(b, b, vcprm(3,2,1,0));
147  }
148 }
149 #endif /* HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX) */
150 
152 {
153 #if HAVE_GNU_AS && HAVE_ALTIVEC && (HAVE_BIGENDIAN || HAVE_VSX)
155  return;
156 
157 #if HAVE_VSX
158  s->fft_calc = ff_fft_calc_interleave_vsx;
159 #else
161 #endif
162  if (s->mdct_bits >= 5) {
163  s->imdct_calc = imdct_calc_altivec;
164  s->imdct_half = imdct_half_altivec;
165  }
166 #endif /* HAVE_GNU_AS && HAVE_ALTIVEC && HAVE_BIGENDIAN */
167 }
#define vec_f
Definition: util_altivec.h:40
float re
Definition: fft.c:82
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
av_cold void ff_fft_init_ppc(FFTContext *s)
Definition: fft_init.c:151
#define av_cold
Definition: attributes.h:82
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
#define U(x)
Definition: vp56_arith.h:37
#define s2
Definition: regdef.h:39
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
#define s0
Definition: regdef.h:37
void(* imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:107
float FFTSample
Definition: avfft.h:35
void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z)
#define b
Definition: input.c:41
Definition: fft.h:88
FFTSample * tsin
Definition: fft.h:97
#define s(width, name)
Definition: cbs_vp9.c:257
int n
Definition: avisynth_c.h:760
#define s3
Definition: regdef.h:40
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
#define vec_u32
Definition: util_altivec.h:38
int mdct_bits
Definition: fft.h:94
float im
Definition: fft.c:82
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
#define s1
Definition: regdef.h:38
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
void(* imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input)
Definition: fft.h:108
Contains misc utility macros and inline functions.
void(* fft_calc)(struct FFTContext *s, FFTComplex *z)
Do a complex FFT with the parameters defined in ff_fft_init().
Definition: fft.h:106
FFTSample * tcos
Definition: fft.h:96
uint16_t * revtab
Definition: fft.h:91