00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include "config.h"
00051 #include "libavcodec/avcodec.h"
00052 #include "libavcodec/fmtconvert.h"
00053
00054 #if HAVE_INLINE_ASM
00055 #if HAVE_MIPSDSPR1
00056 static void float_to_int16_mips(int16_t *dst, const float *src, long len)
00057 {
00058 const float *src_end = src + len;
00059 int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
00060 float src0, src1, src2, src3, src4, src5, src6, src7;
00061
00062
00063
00064
00065 __asm__ volatile(
00066 "beq %[len], $zero, fti16_end%= \n\t"
00067 "fti16_lp%=: \n\t"
00068 "lwc1 %[src0], 0(%[src]) \n\t"
00069 "lwc1 %[src1], 4(%[src]) \n\t"
00070 "lwc1 %[src2], 8(%[src]) \n\t"
00071 "lwc1 %[src3], 12(%[src]) \n\t"
00072 "cvt.w.s %[src0], %[src0] \n\t"
00073 "cvt.w.s %[src1], %[src1] \n\t"
00074 "cvt.w.s %[src2], %[src2] \n\t"
00075 "cvt.w.s %[src3], %[src3] \n\t"
00076 "mfc1 %[ret0], %[src0] \n\t"
00077 "mfc1 %[ret1], %[src1] \n\t"
00078 "mfc1 %[ret2], %[src2] \n\t"
00079 "mfc1 %[ret3], %[src3] \n\t"
00080 "lwc1 %[src4], 16(%[src]) \n\t"
00081 "lwc1 %[src5], 20(%[src]) \n\t"
00082 "lwc1 %[src6], 24(%[src]) \n\t"
00083 "lwc1 %[src7], 28(%[src]) \n\t"
00084 "cvt.w.s %[src4], %[src4] \n\t"
00085 "cvt.w.s %[src5], %[src5] \n\t"
00086 "cvt.w.s %[src6], %[src6] \n\t"
00087 "cvt.w.s %[src7], %[src7] \n\t"
00088 "addiu %[src], 32 \n\t"
00089 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00090 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00091 "shll_s.w %[ret2], %[ret2], 16 \n\t"
00092 "shll_s.w %[ret3], %[ret3], 16 \n\t"
00093 "srl %[ret0], %[ret0], 16 \n\t"
00094 "srl %[ret1], %[ret1], 16 \n\t"
00095 "srl %[ret2], %[ret2], 16 \n\t"
00096 "srl %[ret3], %[ret3], 16 \n\t"
00097 "sh %[ret0], 0(%[dst]) \n\t"
00098 "sh %[ret1], 2(%[dst]) \n\t"
00099 "sh %[ret2], 4(%[dst]) \n\t"
00100 "sh %[ret3], 6(%[dst]) \n\t"
00101 "mfc1 %[ret4], %[src4] \n\t"
00102 "mfc1 %[ret5], %[src5] \n\t"
00103 "mfc1 %[ret6], %[src6] \n\t"
00104 "mfc1 %[ret7], %[src7] \n\t"
00105 "shll_s.w %[ret4], %[ret4], 16 \n\t"
00106 "shll_s.w %[ret5], %[ret5], 16 \n\t"
00107 "shll_s.w %[ret6], %[ret6], 16 \n\t"
00108 "shll_s.w %[ret7], %[ret7], 16 \n\t"
00109 "srl %[ret4], %[ret4], 16 \n\t"
00110 "srl %[ret5], %[ret5], 16 \n\t"
00111 "srl %[ret6], %[ret6], 16 \n\t"
00112 "srl %[ret7], %[ret7], 16 \n\t"
00113 "sh %[ret4], 8(%[dst]) \n\t"
00114 "sh %[ret5], 10(%[dst]) \n\t"
00115 "sh %[ret6], 12(%[dst]) \n\t"
00116 "sh %[ret7], 14(%[dst]) \n\t"
00117 "addiu %[dst], 16 \n\t"
00118 "bne %[src], %[src_end], fti16_lp%= \n\t"
00119 "fti16_end%=: \n\t"
00120 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
00121 [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
00122 [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
00123 [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
00124 [src]"+r"(src), [dst]"+r"(dst)
00125 : [src_end]"r"(src_end), [len]"r"(len)
00126 : "memory"
00127 );
00128 }
00129
00130 static void float_to_int16_interleave_mips(int16_t *dst, const float **src, long len,
00131 int channels)
00132 {
00133 int c, ch2 = channels <<1;
00134 int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
00135 float src0, src1, src2, src3, src4, src5, src6, src7;
00136 int16_t *dst_ptr0, *dst_ptr1, *dst_ptr2, *dst_ptr3;
00137 int16_t *dst_ptr4, *dst_ptr5, *dst_ptr6, *dst_ptr7;
00138 const float *src_ptr, *src_ptr2, *src_end;
00139
00140 if (channels == 2) {
00141 src_ptr = &src[0][0];
00142 src_ptr2 = &src[1][0];
00143 src_end = src_ptr + len;
00144
00145 __asm__ volatile (
00146 "fti16i2_lp%=: \n\t"
00147 "lwc1 %[src0], 0(%[src_ptr]) \n\t"
00148 "lwc1 %[src1], 0(%[src_ptr2]) \n\t"
00149 "addiu %[src_ptr], 4 \n\t"
00150 "cvt.w.s $f9, %[src0] \n\t"
00151 "cvt.w.s $f10, %[src1] \n\t"
00152 "mfc1 %[ret0], $f9 \n\t"
00153 "mfc1 %[ret1], $f10 \n\t"
00154 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00155 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00156 "addiu %[src_ptr2], 4 \n\t"
00157 "srl %[ret0], %[ret0], 16 \n\t"
00158 "srl %[ret1], %[ret1], 16 \n\t"
00159 "sh %[ret0], 0(%[dst]) \n\t"
00160 "sh %[ret1], 2(%[dst]) \n\t"
00161 "addiu %[dst], 4 \n\t"
00162 "bne %[src_ptr], %[src_end], fti16i2_lp%= \n\t"
00163 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1),
00164 [src0]"=&f"(src0), [src1]"=&f"(src1),
00165 [src_ptr]"+r"(src_ptr), [src_ptr2]"+r"(src_ptr2),
00166 [dst]"+r"(dst)
00167 : [src_end]"r"(src_end)
00168 : "memory"
00169 );
00170 } else {
00171 for (c = 0; c < channels; c++) {
00172 src_ptr = &src[c][0];
00173 dst_ptr0 = &dst[c];
00174 src_end = src_ptr + len;
00175
00176
00177
00178 __asm__ volatile(
00179 "fti16i_lp%=: \n\t"
00180 "lwc1 %[src0], 0(%[src_ptr]) \n\t"
00181 "lwc1 %[src1], 4(%[src_ptr]) \n\t"
00182 "lwc1 %[src2], 8(%[src_ptr]) \n\t"
00183 "lwc1 %[src3], 12(%[src_ptr]) \n\t"
00184 "cvt.w.s %[src0], %[src0] \n\t"
00185 "cvt.w.s %[src1], %[src1] \n\t"
00186 "cvt.w.s %[src2], %[src2] \n\t"
00187 "cvt.w.s %[src3], %[src3] \n\t"
00188 "mfc1 %[ret0], %[src0] \n\t"
00189 "mfc1 %[ret1], %[src1] \n\t"
00190 "mfc1 %[ret2], %[src2] \n\t"
00191 "mfc1 %[ret3], %[src3] \n\t"
00192 "lwc1 %[src4], 16(%[src_ptr]) \n\t"
00193 "lwc1 %[src5], 20(%[src_ptr]) \n\t"
00194 "lwc1 %[src6], 24(%[src_ptr]) \n\t"
00195 "lwc1 %[src7], 28(%[src_ptr]) \n\t"
00196 "addu %[dst_ptr1], %[dst_ptr0], %[ch2] \n\t"
00197 "addu %[dst_ptr2], %[dst_ptr1], %[ch2] \n\t"
00198 "addu %[dst_ptr3], %[dst_ptr2], %[ch2] \n\t"
00199 "addu %[dst_ptr4], %[dst_ptr3], %[ch2] \n\t"
00200 "addu %[dst_ptr5], %[dst_ptr4], %[ch2] \n\t"
00201 "addu %[dst_ptr6], %[dst_ptr5], %[ch2] \n\t"
00202 "addu %[dst_ptr7], %[dst_ptr6], %[ch2] \n\t"
00203 "addiu %[src_ptr], 32 \n\t"
00204 "cvt.w.s %[src4], %[src4] \n\t"
00205 "cvt.w.s %[src5], %[src5] \n\t"
00206 "cvt.w.s %[src6], %[src6] \n\t"
00207 "cvt.w.s %[src7], %[src7] \n\t"
00208 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00209 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00210 "shll_s.w %[ret2], %[ret2], 16 \n\t"
00211 "shll_s.w %[ret3], %[ret3], 16 \n\t"
00212 "srl %[ret0], %[ret0], 16 \n\t"
00213 "srl %[ret1], %[ret1], 16 \n\t"
00214 "srl %[ret2], %[ret2], 16 \n\t"
00215 "srl %[ret3], %[ret3], 16 \n\t"
00216 "sh %[ret0], 0(%[dst_ptr0]) \n\t"
00217 "sh %[ret1], 0(%[dst_ptr1]) \n\t"
00218 "sh %[ret2], 0(%[dst_ptr2]) \n\t"
00219 "sh %[ret3], 0(%[dst_ptr3]) \n\t"
00220 "mfc1 %[ret4], %[src4] \n\t"
00221 "mfc1 %[ret5], %[src5] \n\t"
00222 "mfc1 %[ret6], %[src6] \n\t"
00223 "mfc1 %[ret7], %[src7] \n\t"
00224 "shll_s.w %[ret4], %[ret4], 16 \n\t"
00225 "shll_s.w %[ret5], %[ret5], 16 \n\t"
00226 "shll_s.w %[ret6], %[ret6], 16 \n\t"
00227 "shll_s.w %[ret7], %[ret7], 16 \n\t"
00228 "srl %[ret4], %[ret4], 16 \n\t"
00229 "srl %[ret5], %[ret5], 16 \n\t"
00230 "srl %[ret6], %[ret6], 16 \n\t"
00231 "srl %[ret7], %[ret7], 16 \n\t"
00232 "sh %[ret4], 0(%[dst_ptr4]) \n\t"
00233 "sh %[ret5], 0(%[dst_ptr5]) \n\t"
00234 "sh %[ret6], 0(%[dst_ptr6]) \n\t"
00235 "sh %[ret7], 0(%[dst_ptr7]) \n\t"
00236 "addu %[dst_ptr0], %[dst_ptr7], %[ch2] \n\t"
00237 "bne %[src_ptr], %[src_end], fti16i_lp%= \n\t"
00238 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
00239 [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
00240 [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
00241 [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
00242 [dst_ptr1]"=&r"(dst_ptr1), [dst_ptr2]"=&r"(dst_ptr2), [dst_ptr3]"=&r"(dst_ptr3),
00243 [dst_ptr4]"=&r"(dst_ptr4), [dst_ptr5]"=&r"(dst_ptr5), [dst_ptr6]"=&r"(dst_ptr6),
00244 [dst_ptr7]"=&r"(dst_ptr7), [dst_ptr0]"+r"(dst_ptr0), [src_ptr]"+r"(src_ptr)
00245 : [ch2]"r"(ch2), [src_end]"r"(src_end)
00246 : "memory"
00247 );
00248 }
00249 }
00250 }
00251 #endif
00252
00253 static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
00254 float mul, int len)
00255 {
00256
00257
00258
00259 float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
00260
00261 int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23;
00262 const int *src_end = src + len;
00263
00264
00265
00266 __asm__ volatile (
00267 "i32tf_lp%=: \n\t"
00268 "lw %[rpom11], 0(%[src]) \n\t"
00269 "lw %[rpom21], 4(%[src]) \n\t"
00270 "lw %[rpom1], 8(%[src]) \n\t"
00271 "lw %[rpom2], 12(%[src]) \n\t"
00272 "mtc1 %[rpom11], %[temp1] \n\t"
00273 "mtc1 %[rpom21], %[temp3] \n\t"
00274 "mtc1 %[rpom1], %[temp5] \n\t"
00275 "mtc1 %[rpom2], %[temp7] \n\t"
00276
00277 "lw %[rpom13], 16(%[src]) \n\t"
00278 "lw %[rpom23], 20(%[src]) \n\t"
00279 "lw %[rpom12], 24(%[src]) \n\t"
00280 "lw %[rpom22], 28(%[src]) \n\t"
00281 "mtc1 %[rpom13], %[temp9] \n\t"
00282 "mtc1 %[rpom23], %[temp11] \n\t"
00283 "mtc1 %[rpom12], %[temp13] \n\t"
00284 "mtc1 %[rpom22], %[temp15] \n\t"
00285
00286 "addiu %[src], 32 \n\t"
00287 "cvt.s.w %[temp1], %[temp1] \n\t"
00288 "cvt.s.w %[temp3], %[temp3] \n\t"
00289 "cvt.s.w %[temp5], %[temp5] \n\t"
00290 "cvt.s.w %[temp7], %[temp7] \n\t"
00291
00292 "cvt.s.w %[temp9], %[temp9] \n\t"
00293 "cvt.s.w %[temp11], %[temp11] \n\t"
00294 "cvt.s.w %[temp13], %[temp13] \n\t"
00295 "cvt.s.w %[temp15], %[temp15] \n\t"
00296
00297 "mul.s %[temp1], %[temp1], %[mul] \n\t"
00298 "mul.s %[temp3], %[temp3], %[mul] \n\t"
00299 "mul.s %[temp5], %[temp5], %[mul] \n\t"
00300 "mul.s %[temp7], %[temp7], %[mul] \n\t"
00301
00302 "mul.s %[temp9], %[temp9], %[mul] \n\t"
00303 "mul.s %[temp11], %[temp11], %[mul] \n\t"
00304 "mul.s %[temp13], %[temp13], %[mul] \n\t"
00305 "mul.s %[temp15], %[temp15], %[mul] \n\t"
00306
00307 "swc1 %[temp1], 0(%[dst]) \n\t"
00308 "swc1 %[temp3], 4(%[dst]) \n\t"
00309 "swc1 %[temp5], 8(%[dst]) \n\t"
00310 "swc1 %[temp7], 12(%[dst]) \n\t"
00311
00312 "swc1 %[temp9], 16(%[dst]) \n\t"
00313 "swc1 %[temp11], 20(%[dst]) \n\t"
00314 "swc1 %[temp13], 24(%[dst]) \n\t"
00315 "swc1 %[temp15], 28(%[dst]) \n\t"
00316 "addiu %[dst], 32 \n\t"
00317 "bne %[src], %[src_end], i32tf_lp%= \n\t"
00318 : [temp1]"=&f"(temp1), [temp11]"=&f"(temp11),
00319 [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
00320 [temp3]"=&f"(temp3), [temp5]"=&f"(temp5),
00321 [temp7]"=&f"(temp7), [temp9]"=&f"(temp9),
00322 [rpom1]"=&r"(rpom1), [rpom2]"=&r"(rpom2),
00323 [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21),
00324 [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22),
00325 [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23),
00326 [dst]"+r"(dst), [src]"+r"(src)
00327 : [mul]"f"(mul), [src_end]"r"(src_end)
00328 : "memory"
00329 );
00330 }
00331 #endif
00332
00333 av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c)
00334 {
00335 #if HAVE_INLINE_ASM
00336 #if HAVE_MIPSDSPR1
00337 c->float_to_int16_interleave = float_to_int16_interleave_mips;
00338 c->float_to_int16 = float_to_int16_mips;
00339 #endif
00340 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips;
00341 #endif
00342 }