00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "libavcodec/dsputil.h"
00030 #include "asm.h"
00031
00032 extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
00033 int line_size);
00034 extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
00035 int line_size);
00036
00037
00038
00039
00040 #define W1 ((int_fast32_t) 22725)
00041 #define W2 ((int_fast32_t) 21407)
00042 #define W3 ((int_fast32_t) 19266)
00043 #define W4 ((int_fast32_t) 16383)
00044 #define W5 ((int_fast32_t) 12873)
00045 #define W6 ((int_fast32_t) 8867)
00046 #define W7 ((int_fast32_t) 4520)
00047 #define ROW_SHIFT 11
00048 #define COL_SHIFT 20
00049
00050
00051 static inline int idct_row(DCTELEM *row)
00052 {
00053 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
00054 uint64_t l, r, t2;
00055 l = ldq(row);
00056 r = ldq(row + 4);
00057
00058 if (l == 0 && r == 0)
00059 return 0;
00060
00061 a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
00062
00063 if (((l & ~0xffffUL) | r) == 0) {
00064 a0 >>= ROW_SHIFT;
00065 t2 = (uint16_t) a0;
00066 t2 |= t2 << 16;
00067 t2 |= t2 << 32;
00068
00069 stq(t2, row);
00070 stq(t2, row + 4);
00071 return 1;
00072 }
00073
00074 a1 = a0;
00075 a2 = a0;
00076 a3 = a0;
00077
00078 t = extwl(l, 4);
00079 if (t != 0) {
00080 t = sextw(t);
00081 a0 += W2 * t;
00082 a1 += W6 * t;
00083 a2 -= W6 * t;
00084 a3 -= W2 * t;
00085 }
00086
00087 t = extwl(r, 0);
00088 if (t != 0) {
00089 t = sextw(t);
00090 a0 += W4 * t;
00091 a1 -= W4 * t;
00092 a2 -= W4 * t;
00093 a3 += W4 * t;
00094 }
00095
00096 t = extwl(r, 4);
00097 if (t != 0) {
00098 t = sextw(t);
00099 a0 += W6 * t;
00100 a1 -= W2 * t;
00101 a2 += W2 * t;
00102 a3 -= W6 * t;
00103 }
00104
00105 t = extwl(l, 2);
00106 if (t != 0) {
00107 t = sextw(t);
00108 b0 = W1 * t;
00109 b1 = W3 * t;
00110 b2 = W5 * t;
00111 b3 = W7 * t;
00112 } else {
00113 b0 = 0;
00114 b1 = 0;
00115 b2 = 0;
00116 b3 = 0;
00117 }
00118
00119 t = extwl(l, 6);
00120 if (t) {
00121 t = sextw(t);
00122 b0 += W3 * t;
00123 b1 -= W7 * t;
00124 b2 -= W1 * t;
00125 b3 -= W5 * t;
00126 }
00127
00128
00129 t = extwl(r, 2);
00130 if (t) {
00131 t = sextw(t);
00132 b0 += W5 * t;
00133 b1 -= W1 * t;
00134 b2 += W7 * t;
00135 b3 += W3 * t;
00136 }
00137
00138 t = extwl(r, 6);
00139 if (t) {
00140 t = sextw(t);
00141 b0 += W7 * t;
00142 b1 -= W5 * t;
00143 b2 += W3 * t;
00144 b3 -= W1 * t;
00145 }
00146
00147 row[0] = (a0 + b0) >> ROW_SHIFT;
00148 row[1] = (a1 + b1) >> ROW_SHIFT;
00149 row[2] = (a2 + b2) >> ROW_SHIFT;
00150 row[3] = (a3 + b3) >> ROW_SHIFT;
00151 row[4] = (a3 - b3) >> ROW_SHIFT;
00152 row[5] = (a2 - b2) >> ROW_SHIFT;
00153 row[6] = (a1 - b1) >> ROW_SHIFT;
00154 row[7] = (a0 - b0) >> ROW_SHIFT;
00155
00156 return 2;
00157 }
00158
00159 static inline void idct_col(DCTELEM *col)
00160 {
00161 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
00162
00163 col[0] += (1 << (COL_SHIFT - 1)) / W4;
00164
00165 a0 = W4 * col[8 * 0];
00166 a1 = W4 * col[8 * 0];
00167 a2 = W4 * col[8 * 0];
00168 a3 = W4 * col[8 * 0];
00169
00170 if (col[8 * 2]) {
00171 a0 += W2 * col[8 * 2];
00172 a1 += W6 * col[8 * 2];
00173 a2 -= W6 * col[8 * 2];
00174 a3 -= W2 * col[8 * 2];
00175 }
00176
00177 if (col[8 * 4]) {
00178 a0 += W4 * col[8 * 4];
00179 a1 -= W4 * col[8 * 4];
00180 a2 -= W4 * col[8 * 4];
00181 a3 += W4 * col[8 * 4];
00182 }
00183
00184 if (col[8 * 6]) {
00185 a0 += W6 * col[8 * 6];
00186 a1 -= W2 * col[8 * 6];
00187 a2 += W2 * col[8 * 6];
00188 a3 -= W6 * col[8 * 6];
00189 }
00190
00191 if (col[8 * 1]) {
00192 b0 = W1 * col[8 * 1];
00193 b1 = W3 * col[8 * 1];
00194 b2 = W5 * col[8 * 1];
00195 b3 = W7 * col[8 * 1];
00196 } else {
00197 b0 = 0;
00198 b1 = 0;
00199 b2 = 0;
00200 b3 = 0;
00201 }
00202
00203 if (col[8 * 3]) {
00204 b0 += W3 * col[8 * 3];
00205 b1 -= W7 * col[8 * 3];
00206 b2 -= W1 * col[8 * 3];
00207 b3 -= W5 * col[8 * 3];
00208 }
00209
00210 if (col[8 * 5]) {
00211 b0 += W5 * col[8 * 5];
00212 b1 -= W1 * col[8 * 5];
00213 b2 += W7 * col[8 * 5];
00214 b3 += W3 * col[8 * 5];
00215 }
00216
00217 if (col[8 * 7]) {
00218 b0 += W7 * col[8 * 7];
00219 b1 -= W5 * col[8 * 7];
00220 b2 += W3 * col[8 * 7];
00221 b3 -= W1 * col[8 * 7];
00222 }
00223
00224 col[8 * 0] = (a0 + b0) >> COL_SHIFT;
00225 col[8 * 7] = (a0 - b0) >> COL_SHIFT;
00226 col[8 * 1] = (a1 + b1) >> COL_SHIFT;
00227 col[8 * 6] = (a1 - b1) >> COL_SHIFT;
00228 col[8 * 2] = (a2 + b2) >> COL_SHIFT;
00229 col[8 * 5] = (a2 - b2) >> COL_SHIFT;
00230 col[8 * 3] = (a3 + b3) >> COL_SHIFT;
00231 col[8 * 4] = (a3 - b3) >> COL_SHIFT;
00232 }
00233
00234
00235
00236 static inline void idct_col2(DCTELEM *col)
00237 {
00238 int i;
00239 uint64_t l, r;
00240
00241 for (i = 0; i < 8; ++i) {
00242 int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
00243
00244 a0 *= W4;
00245 col[i] = a0 >> COL_SHIFT;
00246 }
00247
00248 l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
00249 stq(l, col + 2 * 4); stq(r, col + 3 * 4);
00250 stq(l, col + 4 * 4); stq(r, col + 5 * 4);
00251 stq(l, col + 6 * 4); stq(r, col + 7 * 4);
00252 stq(l, col + 8 * 4); stq(r, col + 9 * 4);
00253 stq(l, col + 10 * 4); stq(r, col + 11 * 4);
00254 stq(l, col + 12 * 4); stq(r, col + 13 * 4);
00255 stq(l, col + 14 * 4); stq(r, col + 15 * 4);
00256 }
00257
00258 void ff_simple_idct_axp(DCTELEM *block)
00259 {
00260
00261 int i;
00262 int rowsZero = 1;
00263 int rowsConstant = 1;
00264
00265 for (i = 0; i < 8; i++) {
00266 int sparseness = idct_row(block + 8 * i);
00267
00268 if (i > 0 && sparseness > 0)
00269 rowsZero = 0;
00270 if (sparseness == 2)
00271 rowsConstant = 0;
00272 }
00273
00274 if (rowsZero) {
00275 idct_col2(block);
00276 } else if (rowsConstant) {
00277 idct_col(block);
00278 for (i = 0; i < 8; i += 2) {
00279 uint64_t v = (uint16_t) block[0];
00280 uint64_t w = (uint16_t) block[8];
00281
00282 v |= v << 16;
00283 w |= w << 16;
00284 v |= v << 32;
00285 w |= w << 32;
00286 stq(v, block + 0 * 4);
00287 stq(v, block + 1 * 4);
00288 stq(w, block + 2 * 4);
00289 stq(w, block + 3 * 4);
00290 block += 4 * 4;
00291 }
00292 } else {
00293 for (i = 0; i < 8; i++)
00294 idct_col(block + i);
00295 }
00296 }
00297
00298 void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
00299 {
00300 ff_simple_idct_axp(block);
00301 put_pixels_clamped_axp_p(block, dest, line_size);
00302 }
00303
00304 void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
00305 {
00306 ff_simple_idct_axp(block);
00307 add_pixels_clamped_axp_p(block, dest, line_size);
00308 }