00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00056 #include "config.h"
00057 #include "libavcodec/ac3dsp.h"
00058 #include "libavcodec/ac3.h"
00059
00060
00061 #if HAVE_INLINE_ASM
00062 #if HAVE_MIPSDSPR1
00063 static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
00064 int start, int end,
00065 int snr_offset, int floor,
00066 const uint8_t *bap_tab, uint8_t *bap)
00067 {
00068 int band, band_end, cond;
00069 int m, address1, address2;
00070 int16_t *psd1, *psd_end;
00071 uint8_t *bap1;
00072
00073 if (snr_offset == -960) {
00074 memset(bap, 0, AC3_MAX_COEFS);
00075 return;
00076 }
00077
00078 psd1 = &psd[start];
00079 bap1 = &bap[start];
00080 band = ff_ac3_bin_to_band_tab[start];
00081
00082 do {
00083 m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
00084 band_end = ff_ac3_band_start_tab[++band];
00085 band_end = FFMIN(band_end, end);
00086 psd_end = psd + band_end - 1;
00087
00088 __asm__ volatile (
00089 "slt %[cond], %[psd1], %[psd_end] \n\t"
00090 "beqz %[cond], 1f \n\t"
00091 "2: \n\t"
00092 "lh %[address1], 0(%[psd1]) \n\t"
00093 "lh %[address2], 2(%[psd1]) \n\t"
00094 "addiu %[psd1], %[psd1], 4 \n\t"
00095 "subu %[address1], %[address1], %[m] \n\t"
00096 "sra %[address1], %[address1], 5 \n\t"
00097 "addiu %[address1], %[address1], -32 \n\t"
00098 "shll_s.w %[address1], %[address1], 26 \n\t"
00099 "subu %[address2], %[address2], %[m] \n\t"
00100 "sra %[address2], %[address2], 5 \n\t"
00101 "sra %[address1], %[address1], 26 \n\t"
00102 "addiu %[address1], %[address1], 32 \n\t"
00103 "lbux %[address1], %[address1](%[bap_tab]) \n\t"
00104 "addiu %[address2], %[address2], -32 \n\t"
00105 "shll_s.w %[address2], %[address2], 26 \n\t"
00106 "sb %[address1], 0(%[bap1]) \n\t"
00107 "slt %[cond], %[psd1], %[psd_end] \n\t"
00108 "sra %[address2], %[address2], 26 \n\t"
00109 "addiu %[address2], %[address2], 32 \n\t"
00110 "lbux %[address2], %[address2](%[bap_tab]) \n\t"
00111 "sb %[address2], 1(%[bap1]) \n\t"
00112 "addiu %[bap1], %[bap1], 2 \n\t"
00113 "bnez %[cond], 2b \n\t"
00114 "addiu %[psd_end], %[psd_end], 2 \n\t"
00115 "slt %[cond], %[psd1], %[psd_end] \n\t"
00116 "beqz %[cond], 3f \n\t"
00117 "1: \n\t"
00118 "lh %[address1], 0(%[psd1]) \n\t"
00119 "addiu %[psd1], %[psd1], 2 \n\t"
00120 "subu %[address1], %[address1], %[m] \n\t"
00121 "sra %[address1], %[address1], 5 \n\t"
00122 "addiu %[address1], %[address1], -32 \n\t"
00123 "shll_s.w %[address1], %[address1], 26 \n\t"
00124 "sra %[address1], %[address1], 26 \n\t"
00125 "addiu %[address1], %[address1], 32 \n\t"
00126 "lbux %[address1], %[address1](%[bap_tab]) \n\t"
00127 "sb %[address1], 0(%[bap1]) \n\t"
00128 "addiu %[bap1], %[bap1], 1 \n\t"
00129 "3: \n\t"
00130
00131 : [address1]"=&r"(address1), [address2]"=&r"(address2),
00132 [cond]"=&r"(cond), [bap1]"+r"(bap1),
00133 [psd1]"+r"(psd1), [psd_end]"+r"(psd_end)
00134 : [m]"r"(m), [bap_tab]"r"(bap_tab)
00135 : "memory"
00136 );
00137 } while (end > band_end);
00138 }
00139
00140 static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
00141 int len)
00142 {
00143 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
00144
00145 __asm__ volatile (
00146 "andi %[temp3], %[len], 3 \n\t"
00147 "addu %[temp2], %[bap], %[len] \n\t"
00148 "addu %[temp4], %[bap], %[temp3] \n\t"
00149 "beq %[temp2], %[temp4], 4f \n\t"
00150 "1: \n\t"
00151 "lbu %[temp0], -1(%[temp2]) \n\t"
00152 "lbu %[temp5], -2(%[temp2]) \n\t"
00153 "lbu %[temp6], -3(%[temp2]) \n\t"
00154 "sll %[temp0], %[temp0], 1 \n\t"
00155 "addu %[temp0], %[mant_cnt], %[temp0] \n\t"
00156 "sll %[temp5], %[temp5], 1 \n\t"
00157 "addu %[temp5], %[mant_cnt], %[temp5] \n\t"
00158 "lhu %[temp1], 0(%[temp0]) \n\t"
00159 "sll %[temp6], %[temp6], 1 \n\t"
00160 "addu %[temp6], %[mant_cnt], %[temp6] \n\t"
00161 "addiu %[temp1], %[temp1], 1 \n\t"
00162 "sh %[temp1], 0(%[temp0]) \n\t"
00163 "lhu %[temp1], 0(%[temp5]) \n\t"
00164 "lbu %[temp7], -4(%[temp2]) \n\t"
00165 "addiu %[temp2], %[temp2], -4 \n\t"
00166 "addiu %[temp1], %[temp1], 1 \n\t"
00167 "sh %[temp1], 0(%[temp5]) \n\t"
00168 "lhu %[temp1], 0(%[temp6]) \n\t"
00169 "sll %[temp7], %[temp7], 1 \n\t"
00170 "addu %[temp7], %[mant_cnt], %[temp7] \n\t"
00171 "addiu %[temp1], %[temp1],1 \n\t"
00172 "sh %[temp1], 0(%[temp6]) \n\t"
00173 "lhu %[temp1], 0(%[temp7]) \n\t"
00174 "addiu %[temp1], %[temp1], 1 \n\t"
00175 "sh %[temp1], 0(%[temp7]) \n\t"
00176 "bne %[temp2], %[temp4], 1b \n\t"
00177 "4: \n\t"
00178 "beqz %[temp3], 2f \n\t"
00179 "3: \n\t"
00180 "addiu %[temp3], %[temp3], -1 \n\t"
00181 "lbu %[temp0], -1(%[temp2]) \n\t"
00182 "addiu %[temp2], %[temp2], -1 \n\t"
00183 "sll %[temp0], %[temp0], 1 \n\t"
00184 "addu %[temp0], %[mant_cnt], %[temp0] \n\t"
00185 "lhu %[temp1], 0(%[temp0]) \n\t"
00186 "addiu %[temp1], %[temp1], 1 \n\t"
00187 "sh %[temp1], 0(%[temp0]) \n\t"
00188 "bgtz %[temp3], 3b \n\t"
00189 "2: \n\t"
00190
00191 : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
00192 [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
00193 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
00194 [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
00195 : [len] "r" (len), [bap] "r" (bap),
00196 [mant_cnt] "r" (mant_cnt)
00197 : "memory"
00198 );
00199 }
00200 #endif
00201
00202 #if HAVE_MIPSFPU && HAVE_MIPS32R2
00203 static void float_to_fixed24_mips(int32_t *dst, const float *src, unsigned int len)
00204 {
00205 const float scale = 1 << 24;
00206 float src0, src1, src2, src3, src4, src5, src6, src7;
00207 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
00208
00209 do {
00210 __asm__ volatile (
00211 "lwc1 %[src0], 0(%[src]) \n\t"
00212 "lwc1 %[src1], 4(%[src]) \n\t"
00213 "lwc1 %[src2], 8(%[src]) \n\t"
00214 "lwc1 %[src3], 12(%[src]) \n\t"
00215 "lwc1 %[src4], 16(%[src]) \n\t"
00216 "lwc1 %[src5], 20(%[src]) \n\t"
00217 "lwc1 %[src6], 24(%[src]) \n\t"
00218 "lwc1 %[src7], 28(%[src]) \n\t"
00219 "mul.s %[src0], %[src0], %[scale] \n\t"
00220 "mul.s %[src1], %[src1], %[scale] \n\t"
00221 "mul.s %[src2], %[src2], %[scale] \n\t"
00222 "mul.s %[src3], %[src3], %[scale] \n\t"
00223 "mul.s %[src4], %[src4], %[scale] \n\t"
00224 "mul.s %[src5], %[src5], %[scale] \n\t"
00225 "mul.s %[src6], %[src6], %[scale] \n\t"
00226 "mul.s %[src7], %[src7], %[scale] \n\t"
00227 "cvt.w.s %[src0], %[src0] \n\t"
00228 "cvt.w.s %[src1], %[src1] \n\t"
00229 "cvt.w.s %[src2], %[src2] \n\t"
00230 "cvt.w.s %[src3], %[src3] \n\t"
00231 "cvt.w.s %[src4], %[src4] \n\t"
00232 "cvt.w.s %[src5], %[src5] \n\t"
00233 "cvt.w.s %[src6], %[src6] \n\t"
00234 "cvt.w.s %[src7], %[src7] \n\t"
00235 "mfc1 %[temp0], %[src0] \n\t"
00236 "mfc1 %[temp1], %[src1] \n\t"
00237 "mfc1 %[temp2], %[src2] \n\t"
00238 "mfc1 %[temp3], %[src3] \n\t"
00239 "mfc1 %[temp4], %[src4] \n\t"
00240 "mfc1 %[temp5], %[src5] \n\t"
00241 "mfc1 %[temp6], %[src6] \n\t"
00242 "mfc1 %[temp7], %[src7] \n\t"
00243 "sw %[temp0], 0(%[dst]) \n\t"
00244 "sw %[temp1], 4(%[dst]) \n\t"
00245 "sw %[temp2], 8(%[dst]) \n\t"
00246 "sw %[temp3], 12(%[dst]) \n\t"
00247 "sw %[temp4], 16(%[dst]) \n\t"
00248 "sw %[temp5], 20(%[dst]) \n\t"
00249 "sw %[temp6], 24(%[dst]) \n\t"
00250 "sw %[temp7], 28(%[dst]) \n\t"
00251
00252 : [dst] "+r" (dst), [src] "+r" (src),
00253 [src0] "=&f" (src0), [src1] "=&f" (src1),
00254 [src2] "=&f" (src2), [src3] "=&f" (src3),
00255 [src4] "=&f" (src4), [src5] "=&f" (src5),
00256 [src6] "=&f" (src6), [src7] "=&f" (src7),
00257 [temp0] "=r" (temp0), [temp1] "=r" (temp1),
00258 [temp2] "=r" (temp2), [temp3] "=r" (temp3),
00259 [temp4] "=r" (temp4), [temp5] "=r" (temp5),
00260 [temp6] "=r" (temp6), [temp7] "=r" (temp7)
00261 : [scale] "f" (scale)
00262 : "memory"
00263 );
00264 src = src + 8;
00265 dst = dst + 8;
00266 len -= 8;
00267 } while (len > 0);
00268 }
00269
00270 static void ac3_downmix_mips(float **samples, float (*matrix)[2],
00271 int out_ch, int in_ch, int len)
00272 {
00273 int i, j, i1, i2, i3;
00274 float v0, v1, v2, v3;
00275 float v4, v5, v6, v7;
00276 float samples0, samples1, samples2, samples3, matrix_j, matrix_j2;
00277 float *samples_p,*matrix_p, **samples_x, **samples_end, **samples_sw;
00278
00279 __asm__ volatile(
00280 ".set push \n\t"
00281 ".set noreorder \n\t"
00282
00283 "li %[i1], 2 \n\t"
00284 "sll %[len], 2 \n\t"
00285 "move %[i], $zero \n\t"
00286 "sll %[j], %[in_ch], 2 \n\t"
00287
00288 "bne %[out_ch], %[i1], 3f \n\t"
00289 " li %[i2], 1 \n\t"
00290
00291 "2: \n\t"
00292 "move %[matrix_p], %[matrix] \n\t"
00293 "move %[samples_x], %[samples] \n\t"
00294 "mtc1 $zero, %[v0] \n\t"
00295 "mtc1 $zero, %[v1] \n\t"
00296 "mtc1 $zero, %[v2] \n\t"
00297 "mtc1 $zero, %[v3] \n\t"
00298 "mtc1 $zero, %[v4] \n\t"
00299 "mtc1 $zero, %[v5] \n\t"
00300 "mtc1 $zero, %[v6] \n\t"
00301 "mtc1 $zero, %[v7] \n\t"
00302 "addiu %[i1], %[i], 4 \n\t"
00303 "addiu %[i2], %[i], 8 \n\t"
00304 "lw %[samples_p], 0(%[samples_x]) \n\t"
00305 "addiu %[i3], %[i], 12 \n\t"
00306 "addu %[samples_end], %[samples_x], %[j] \n\t"
00307 "move %[samples_sw], %[samples_p] \n\t"
00308
00309 "1: \n\t"
00310 "lwc1 %[matrix_j], 0(%[matrix_p]) \n\t"
00311 "lwc1 %[matrix_j2], 4(%[matrix_p]) \n\t"
00312 "lwxc1 %[samples0], %[i](%[samples_p]) \n\t"
00313 "lwxc1 %[samples1], %[i1](%[samples_p]) \n\t"
00314 "lwxc1 %[samples2], %[i2](%[samples_p]) \n\t"
00315 "lwxc1 %[samples3], %[i3](%[samples_p]) \n\t"
00316 "addiu %[matrix_p], 8 \n\t"
00317 "addiu %[samples_x], 4 \n\t"
00318 "madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
00319 "madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
00320 "madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
00321 "madd.s %[v3], %[v3], %[samples3], %[matrix_j] \n\t"
00322 "madd.s %[v4], %[v4], %[samples0], %[matrix_j2]\n\t"
00323 "madd.s %[v5], %[v5], %[samples1], %[matrix_j2]\n\t"
00324 "madd.s %[v6], %[v6], %[samples2], %[matrix_j2]\n\t"
00325 "madd.s %[v7], %[v7], %[samples3], %[matrix_j2]\n\t"
00326 "bne %[samples_x], %[samples_end], 1b \n\t"
00327 " lw %[samples_p], 0(%[samples_x]) \n\t"
00328
00329 "lw %[samples_p], 4(%[samples]) \n\t"
00330 "swxc1 %[v0], %[i](%[samples_sw]) \n\t"
00331 "swxc1 %[v1], %[i1](%[samples_sw]) \n\t"
00332 "swxc1 %[v2], %[i2](%[samples_sw]) \n\t"
00333 "swxc1 %[v3], %[i3](%[samples_sw]) \n\t"
00334 "swxc1 %[v4], %[i](%[samples_p]) \n\t"
00335 "addiu %[i], 16 \n\t"
00336 "swxc1 %[v5], %[i1](%[samples_p]) \n\t"
00337 "swxc1 %[v6], %[i2](%[samples_p]) \n\t"
00338 "bne %[i], %[len], 2b \n\t"
00339 " swxc1 %[v7], %[i3](%[samples_p]) \n\t"
00340
00341 "3: \n\t"
00342 "bne %[out_ch], %[i2], 6f \n\t"
00343 " nop \n\t"
00344
00345 "5: \n\t"
00346 "move %[matrix_p], %[matrix] \n\t"
00347 "move %[samples_x], %[samples] \n\t"
00348 "mtc1 $zero, %[v0] \n\t"
00349 "mtc1 $zero, %[v1] \n\t"
00350 "mtc1 $zero, %[v2] \n\t"
00351 "mtc1 $zero, %[v3] \n\t"
00352 "addiu %[i1], %[i], 4 \n\t"
00353 "addiu %[i2], %[i], 8 \n\t"
00354 "lw %[samples_p], 0(%[samples_x]) \n\t"
00355 "addiu %[i3], %[i], 12 \n\t"
00356 "addu %[samples_end], %[samples_x], %[j] \n\t"
00357 "move %[samples_sw], %[samples_p] \n\t"
00358
00359 "4: \n\t"
00360 "lwc1 %[matrix_j], 0(%[matrix_p]) \n\t"
00361 "lwxc1 %[samples0], %[i](%[samples_p]) \n\t"
00362 "lwxc1 %[samples1], %[i1](%[samples_p]) \n\t"
00363 "lwxc1 %[samples2], %[i2](%[samples_p]) \n\t"
00364 "lwxc1 %[samples3], %[i3](%[samples_p]) \n\t"
00365 "addiu %[matrix_p], 8 \n\t"
00366 "addiu %[samples_x], 4 \n\t"
00367 "madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
00368 "madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
00369 "madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
00370 "madd.s %[v3], %[v3], %[samples3], %[matrix_j] \n\t"
00371 "bne %[samples_x], %[samples_end], 4b \n\t"
00372 " lw %[samples_p], 0(%[samples_x]) \n\t"
00373
00374 "swxc1 %[v0], %[i](%[samples_sw]) \n\t"
00375 "addiu %[i], 16 \n\t"
00376 "swxc1 %[v1], %[i1](%[samples_sw]) \n\t"
00377 "swxc1 %[v2], %[i2](%[samples_sw]) \n\t"
00378 "bne %[i], %[len], 5b \n\t"
00379 " swxc1 %[v3], %[i3](%[samples_sw]) \n\t"
00380 "6: \n\t"
00381
00382 ".set pop"
00383 :[samples_p]"=&r"(samples_p), [matrix_j]"=&f"(matrix_j), [matrix_j2]"=&f"(matrix_j2),
00384 [samples0]"=&f"(samples0), [samples1]"=&f"(samples1),
00385 [samples2]"=&f"(samples2), [samples3]"=&f"(samples3),
00386 [v0]"=&f"(v0), [v1]"=&f"(v1), [v2]"=&f"(v2), [v3]"=&f"(v3),
00387 [v4]"=&f"(v4), [v5]"=&f"(v5), [v6]"=&f"(v6), [v7]"=&f"(v7),
00388 [samples_x]"=&r"(samples_x), [matrix_p]"=&r"(matrix_p),
00389 [samples_end]"=&r"(samples_end), [samples_sw]"=&r"(samples_sw),
00390 [i1]"=&r"(i1), [i2]"=&r"(i2), [i3]"=&r"(i3), [i]"=&r"(i),
00391 [j]"=&r"(j), [len]"+r"(len)
00392 :[samples]"r"(samples), [matrix]"r"(matrix),
00393 [in_ch]"r"(in_ch), [out_ch]"r"(out_ch)
00394 :"memory"
00395 );
00396 }
00397 #endif
00398 #endif
00399
00400 void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact) {
00401 #if HAVE_INLINE_ASM
00402 #if HAVE_MIPSDSPR1
00403 c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_mips;
00404 c->update_bap_counts = ac3_update_bap_counts_mips;
00405 #endif
00406 #if HAVE_MIPSFPU && HAVE_MIPS32R2
00407 c->float_to_fixed24 = float_to_fixed24_mips;
00408 c->downmix = ac3_downmix_mips;
00409 #endif
00410 #endif
00411
00412 }