00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00071 #include <stdlib.h>
00072 #include <stdio.h>
00073 #include "libavutil/common.h"
00074 #include "dsputil.h"
00075
00076 #define DCTSIZE 8
00077 #define GLOBAL(x) x
00078 #define RIGHT_SHIFT(x, n) ((x) >> (n))
00079 #define SHIFT_TEMPS
00080
00081
00082
00083
00084
00085 #if DCTSIZE != 8
00086 Sorry, this code only copes with 8x8 DCTs.
00087 #endif
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 #define CONST_BITS 8
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118 #if CONST_BITS == 8
00119 #define FIX_0_382683433 ((int32_t) 98)
00120 #define FIX_0_541196100 ((int32_t) 139)
00121 #define FIX_0_707106781 ((int32_t) 181)
00122 #define FIX_1_306562965 ((int32_t) 334)
00123 #else
00124 #define FIX_0_382683433 FIX(0.382683433)
00125 #define FIX_0_541196100 FIX(0.541196100)
00126 #define FIX_0_707106781 FIX(0.707106781)
00127 #define FIX_1_306562965 FIX(1.306562965)
00128 #endif
00129
00130
00131
00132
00133
00134
00135
00136 #ifndef USE_ACCURATE_ROUNDING
00137 #undef DESCALE
00138 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
00139 #endif
00140
00141
00142
00143
00144
00145
00146 #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
00147
00148 static av_always_inline void row_fdct(DCTELEM * data){
00149 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00150 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00151 int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00152 DCTELEM *dataptr;
00153 int ctr;
00154 SHIFT_TEMPS
00155
00156
00157
00158 dataptr = data;
00159 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00160 tmp0 = dataptr[0] + dataptr[7];
00161 tmp7 = dataptr[0] - dataptr[7];
00162 tmp1 = dataptr[1] + dataptr[6];
00163 tmp6 = dataptr[1] - dataptr[6];
00164 tmp2 = dataptr[2] + dataptr[5];
00165 tmp5 = dataptr[2] - dataptr[5];
00166 tmp3 = dataptr[3] + dataptr[4];
00167 tmp4 = dataptr[3] - dataptr[4];
00168
00169
00170
00171 tmp10 = tmp0 + tmp3;
00172 tmp13 = tmp0 - tmp3;
00173 tmp11 = tmp1 + tmp2;
00174 tmp12 = tmp1 - tmp2;
00175
00176 dataptr[0] = tmp10 + tmp11;
00177 dataptr[4] = tmp10 - tmp11;
00178
00179 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00180 dataptr[2] = tmp13 + z1;
00181 dataptr[6] = tmp13 - z1;
00182
00183
00184
00185 tmp10 = tmp4 + tmp5;
00186 tmp11 = tmp5 + tmp6;
00187 tmp12 = tmp6 + tmp7;
00188
00189
00190 z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00191 z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;
00192 z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;
00193 z3 = MULTIPLY(tmp11, FIX_0_707106781);
00194
00195 z11 = tmp7 + z3;
00196 z13 = tmp7 - z3;
00197
00198 dataptr[5] = z13 + z2;
00199 dataptr[3] = z13 - z2;
00200 dataptr[1] = z11 + z4;
00201 dataptr[7] = z11 - z4;
00202
00203 dataptr += DCTSIZE;
00204 }
00205 }
00206
00207
00208
00209
00210
00211 GLOBAL(void)
00212 fdct_ifast (DCTELEM * data)
00213 {
00214 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00215 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00216 int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00217 DCTELEM *dataptr;
00218 int ctr;
00219 SHIFT_TEMPS
00220
00221 row_fdct(data);
00222
00223
00224
00225 dataptr = data;
00226 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00227 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00228 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00229 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00230 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00231 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00232 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00233 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00234 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00235
00236
00237
00238 tmp10 = tmp0 + tmp3;
00239 tmp13 = tmp0 - tmp3;
00240 tmp11 = tmp1 + tmp2;
00241 tmp12 = tmp1 - tmp2;
00242
00243 dataptr[DCTSIZE*0] = tmp10 + tmp11;
00244 dataptr[DCTSIZE*4] = tmp10 - tmp11;
00245
00246 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00247 dataptr[DCTSIZE*2] = tmp13 + z1;
00248 dataptr[DCTSIZE*6] = tmp13 - z1;
00249
00250
00251
00252 tmp10 = tmp4 + tmp5;
00253 tmp11 = tmp5 + tmp6;
00254 tmp12 = tmp6 + tmp7;
00255
00256
00257 z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00258 z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;
00259 z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;
00260 z3 = MULTIPLY(tmp11, FIX_0_707106781);
00261
00262 z11 = tmp7 + z3;
00263 z13 = tmp7 - z3;
00264
00265 dataptr[DCTSIZE*5] = z13 + z2;
00266 dataptr[DCTSIZE*3] = z13 - z2;
00267 dataptr[DCTSIZE*1] = z11 + z4;
00268 dataptr[DCTSIZE*7] = z11 - z4;
00269
00270 dataptr++;
00271 }
00272 }
00273
00274
00275
00276
00277
00278 GLOBAL(void)
00279 fdct_ifast248 (DCTELEM * data)
00280 {
00281 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00282 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00283 int_fast16_t z1;
00284 DCTELEM *dataptr;
00285 int ctr;
00286 SHIFT_TEMPS
00287
00288 row_fdct(data);
00289
00290
00291
00292 dataptr = data;
00293 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00294 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
00295 tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00296 tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
00297 tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
00298 tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
00299 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00300 tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
00301 tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
00302
00303
00304
00305 tmp10 = tmp0 + tmp3;
00306 tmp11 = tmp1 + tmp2;
00307 tmp12 = tmp1 - tmp2;
00308 tmp13 = tmp0 - tmp3;
00309
00310 dataptr[DCTSIZE*0] = tmp10 + tmp11;
00311 dataptr[DCTSIZE*4] = tmp10 - tmp11;
00312
00313 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00314 dataptr[DCTSIZE*2] = tmp13 + z1;
00315 dataptr[DCTSIZE*6] = tmp13 - z1;
00316
00317 tmp10 = tmp4 + tmp7;
00318 tmp11 = tmp5 + tmp6;
00319 tmp12 = tmp5 - tmp6;
00320 tmp13 = tmp4 - tmp7;
00321
00322 dataptr[DCTSIZE*1] = tmp10 + tmp11;
00323 dataptr[DCTSIZE*5] = tmp10 - tmp11;
00324
00325 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00326 dataptr[DCTSIZE*3] = tmp13 + z1;
00327 dataptr[DCTSIZE*7] = tmp13 - z1;
00328
00329 dataptr++;
00330 }
00331 }
00332
00333
00334 #undef GLOBAL
00335 #undef CONST_BITS
00336 #undef DESCALE
00337 #undef FIX_0_541196100
00338 #undef FIX_1_306562965