00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "libavutil/common.h"
00029 #include "libavcodec/dsputil.h"
00030 #include "mmi.h"
00031
00032 #define BITS_INV_ACC 5 // 4 or 5 for IEEE
00033 #define SHIFT_INV_ROW (16 - BITS_INV_ACC)
00034 #define SHIFT_INV_COL (1 + BITS_INV_ACC)
00035
00036 #define TG1 6518
00037 #define TG2 13573
00038 #define TG3 21895
00039 #define CS4 23170
00040
00041 #define ROUNDER_0 0
00042 #define ROUNDER_1 16
00043
00044 #define TAB_i_04 (32+0)
00045 #define TAB_i_17 (32+64)
00046 #define TAB_i_26 (32+128)
00047 #define TAB_i_35 (32+192)
00048
00049 #define TG_1_16 (32+256+0)
00050 #define TG_2_16 (32+256+16)
00051 #define TG_3_16 (32+256+32)
00052 #define COS_4_16 (32+256+48)
00053
00054 #define CLIPMAX (32+256+64+0)
00055
00056 static short consttable[] align16 = {
00057
00058 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
00059
00060 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
00061
00062 16384, 21407, -16384, -21407, 22725, 19266, -22725, -12873,
00063 8867, 16384, 8867, 16384, 4520, 12873, -4520, 19266,
00064 16384, -8867, 16384, -8867, 12873, -22725, 19266, -22725,
00065 21407, -16384, -21407, 16384, 19266, 4520, -12873, 4520,
00066
00067 22725, 29692, -22725, -29692, 31521, 26722, -31521, -17855,
00068 12299, 22725, 12299, 22725, 6270, 17855, -6270, 26722,
00069 22725, -12299, 22725, -12299, 17855, -31521, 26722, -31521,
00070 29692, -22725, -29692, 22725, 26722, 6270, -17855, 6270,
00071
00072 21407, 27969, -21407, -27969, 29692, 25172, -29692, -16819,
00073 11585, 21407, 11585, 21407, 5906, 16819, -5906, 25172,
00074 21407, -11585, 21407, -11585, 16819, -29692, 25172, -29692,
00075 27969, -21407, -27969, 21407, 25172, 5906, -16819, 5906,
00076
00077 19266, 25172, -19266, -25172, 26722, 22654, -26722, -15137,
00078 10426, 19266, 10426, 19266, 5315, 15137, -5315, 22654,
00079 19266, -10426, 19266, -10426, 15137, -26722, 22654, -26722,
00080 25172, -19266, -25172, 19266, 22654, 5315, -15137, 5315,
00081
00082 TG1, TG1, TG1, TG1, TG1, TG1, TG1, TG1,
00083 TG2, TG2, TG2, TG2, TG2, TG2, TG2, TG2,
00084 TG3, TG3, TG3, TG3, TG3, TG3, TG3, TG3,
00085 CS4, CS4, CS4, CS4, CS4, CS4, CS4, CS4,
00086
00087 255, 255, 255, 255, 255, 255, 255, 255
00088 };
00089
00090
00091 #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
00092 lq(blk, rowoff, $16); \
00093 \
00094 lq($24, 0+taboff, $17); \
00095 \
00096 lq($24, 16+taboff, $18); \
00097 prevh($16, $2); \
00098 lq($24, 32+taboff, $19); \
00099 phmadh($17, $16, $17); \
00100 lq($24, 48+taboff, $20); \
00101 phmadh($18, $2, $18); \
00102 phmadh($19, $16, $19); \
00103 phmadh($20, $2, $20); \
00104 paddw($17, $18, $17); \
00105 paddw($19, $20, $19); \
00106 pcpyld($19, $17, $18); \
00107 pcpyud($17, $19, $20); \
00108 paddw($18, rnd, $18); \
00109 paddw($18, $20, $17); \
00110 psubw($18, $20, $20); \
00111 psraw($17, SHIFT_INV_ROW, $17); \
00112 psraw($20, SHIFT_INV_ROW, $20); \
00113 ppach($20, $17, outreg); \
00114 \
00115 prevh(outreg, $2); \
00116 pcpyud($2, $2, $2); \
00117 pcpyld($2, outreg, outreg); \
00118 }
00119
00120
00121 #define DCT_8_INV_COL8() \
00122 \
00123 lq($24, TG_3_16, $2); \
00124 \
00125 pmulth($11, $2, $17); \
00126 psraw($17, 15, $17); \
00127 pmfhl_uw($3); \
00128 psraw($3, 15, $3); \
00129 pinteh($3, $17, $17); \
00130 psubh($17, $13, $17); \
00131 \
00132 pmulth($13, $2, $18); \
00133 psraw($18, 15, $18); \
00134 pmfhl_uw($3); \
00135 psraw($3, 15, $3); \
00136 pinteh($3, $18, $18); \
00137 paddh($18, $11, $18); \
00138 \
00139 lq($24, TG_1_16, $2); \
00140 \
00141 pmulth($15, $2, $19); \
00142 psraw($19, 15, $19); \
00143 pmfhl_uw($3); \
00144 psraw($3, 15, $3); \
00145 pinteh($3, $19, $19); \
00146 paddh($19, $9, $19); \
00147 \
00148 pmulth($9, $2, $20); \
00149 psraw($20, 15, $20); \
00150 pmfhl_uw($3); \
00151 psraw($3, 15, $3); \
00152 pinteh($3, $20, $20); \
00153 psubh($20, $15, $20); \
00154 \
00155 psubh($19, $18, $3); \
00156 paddh($20, $17, $16); \
00157 psubh($20, $17, $23); \
00158 paddh($19, $18, $20); \
00159 \
00160 lq($24, COS_4_16, $2); \
00161 \
00162 paddh($3, $16, $21); \
00163 psubh($3, $16, $22); \
00164 \
00165 pmulth($21, $2, $21); \
00166 psraw($21, 15, $21); \
00167 pmfhl_uw($3); \
00168 psraw($3, 15, $3); \
00169 pinteh($3, $21, $21); \
00170 \
00171 pmulth($22, $2, $22); \
00172 psraw($22, 15, $22); \
00173 pmfhl_uw($3); \
00174 psraw($3, 15, $3); \
00175 pinteh($3, $22, $22); \
00176 \
00177 lq($24, TG_2_16, $2); \
00178 \
00179 pmulth($10, $2, $17); \
00180 psraw($17, 15, $17); \
00181 pmfhl_uw($3); \
00182 psraw($3, 15, $3); \
00183 pinteh($3, $17, $17); \
00184 psubh($17, $14, $17); \
00185 \
00186 pmulth($14, $2, $18); \
00187 psraw($18, 15, $18); \
00188 pmfhl_uw($3); \
00189 psraw($3, 15, $3); \
00190 pinteh($3, $18, $18); \
00191 paddh($18, $10, $18); \
00192 \
00193 paddh($8, $12, $2); \
00194 psubh($8, $12, $3); \
00195 \
00196 paddh($2, $18, $16); \
00197 psubh($2, $18, $19); \
00198 psubh($3, $17, $18); \
00199 paddh($3, $17, $17);
00200
00201
00202 #define DCT_8_INV_COL8_STORE(blk) \
00203 \
00204 paddh($16, $20, $2); \
00205 psubh($16, $20, $16); \
00206 psrah($2, SHIFT_INV_COL, $2); \
00207 psrah($16, SHIFT_INV_COL, $16); \
00208 sq($2, 0, blk); \
00209 sq($16, 112, blk); \
00210 \
00211 paddh($17, $21, $3); \
00212 psubh($17, $21, $17); \
00213 psrah($3, SHIFT_INV_COL, $3); \
00214 psrah($17, SHIFT_INV_COL, $17); \
00215 sq($3, 16, blk); \
00216 sq($17, 96, blk); \
00217 \
00218 paddh($18, $22, $2); \
00219 psubh($18, $22, $18); \
00220 psrah($2, SHIFT_INV_COL, $2); \
00221 psrah($18, SHIFT_INV_COL, $18); \
00222 sq($2, 32, blk); \
00223 sq($18, 80, blk); \
00224 \
00225 paddh($19, $23, $3); \
00226 psubh($19, $23, $19); \
00227 psrah($3, SHIFT_INV_COL, $3); \
00228 psrah($19, SHIFT_INV_COL, $19); \
00229 sq($3, 48, blk); \
00230 sq($19, 64, blk);
00231
00232
00233
00234 #define DCT_8_INV_COL8_PMS() \
00235 paddh($16, $20, $2); \
00236 psubh($16, $20, $20); \
00237 psrah($2, SHIFT_INV_COL, $16); \
00238 psrah($20, SHIFT_INV_COL, $20); \
00239 \
00240 paddh($17, $21, $3); \
00241 psubh($17, $21, $21); \
00242 psrah($3, SHIFT_INV_COL, $17); \
00243 psrah($21, SHIFT_INV_COL, $21); \
00244 \
00245 paddh($18, $22, $2); \
00246 psubh($18, $22, $22); \
00247 psrah($2, SHIFT_INV_COL, $18); \
00248 psrah($22, SHIFT_INV_COL, $22); \
00249 \
00250 paddh($19, $23, $3); \
00251 psubh($19, $23, $23); \
00252 psrah($3, SHIFT_INV_COL, $19); \
00253 psrah($23, SHIFT_INV_COL, $23);
00254
00255 #define PUT(rs) \
00256 pminh(rs, $11, $2); \
00257 pmaxh($2, $0, $2); \
00258 ppacb($0, $2, $2); \
00259 sd3(2, 0, 4); \
00260 __asm__ volatile ("add $4, $5, $4");
00261
00262 #define DCT_8_INV_COL8_PUT() \
00263 PUT($16); \
00264 PUT($17); \
00265 PUT($18); \
00266 PUT($19); \
00267 PUT($23); \
00268 PUT($22); \
00269 PUT($21); \
00270 PUT($20);
00271
00272 #define ADD(rs) \
00273 ld3(4, 0, 2); \
00274 pextlb($0, $2, $2); \
00275 paddh($2, rs, $2); \
00276 pminh($2, $11, $2); \
00277 pmaxh($2, $0, $2); \
00278 ppacb($0, $2, $2); \
00279 sd3(2, 0, 4); \
00280 __asm__ volatile ("add $4, $5, $4");
00281
00282
00283 #define DCT_8_INV_COL8_ADD() \
00284 ADD($16); \
00285 ADD($17); \
00286 ADD($18); \
00287 ADD($19); \
00288 ADD($23); \
00289 ADD($22); \
00290 ADD($21); \
00291 ADD($20);
00292
00293
00294 void ff_mmi_idct(int16_t * block)
00295 {
00296
00297 __asm__ volatile("la $24, %0"::"m"(consttable[0]));
00298 lq($24, ROUNDER_0, $8);
00299 lq($24, ROUNDER_1, $7);
00300 DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
00301 DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
00302 DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
00303 DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
00304 DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
00305 DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
00306 DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
00307 DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
00308 DCT_8_INV_COL8();
00309 DCT_8_INV_COL8_STORE($4);
00310
00311
00312 __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00313 }
00314
00315
00316 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
00317 {
00318
00319 __asm__ volatile("la $24, %0"::"m"(consttable[0]));
00320 lq($24, ROUNDER_0, $8);
00321 lq($24, ROUNDER_1, $7);
00322 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
00323 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
00324 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
00325 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
00326 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
00327 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
00328 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
00329 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
00330 DCT_8_INV_COL8();
00331 lq($24, CLIPMAX, $11);
00332 DCT_8_INV_COL8_PMS();
00333 DCT_8_INV_COL8_PUT();
00334
00335
00336 __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00337 }
00338
00339
00340 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
00341 {
00342
00343 __asm__ volatile("la $24, %0"::"m"(consttable[0]));
00344 lq($24, ROUNDER_0, $8);
00345 lq($24, ROUNDER_1, $7);
00346 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
00347 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
00348 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
00349 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
00350 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
00351 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
00352 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
00353 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
00354 DCT_8_INV_COL8();
00355 lq($24, CLIPMAX, $11);
00356 DCT_8_INV_COL8_PMS();
00357 DCT_8_INV_COL8_ADD();
00358
00359
00360 __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00361 }