00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include "config.h"
00051 #include "libavcodec/avcodec.h"
00052 #include "libavcodec/fmtconvert.h"
00053
00054 #if HAVE_MIPSDSPR1
00055 static void float_to_int16_mips(int16_t *dst, const float *src, long len)
00056 {
00057 const float *src_end = src + len;
00058 int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
00059 float src0, src1, src2, src3, src4, src5, src6, src7;
00060
00061
00062
00063
00064 __asm__ volatile(
00065 "beq %[len], $zero, fti16_end%= \n\t"
00066 "fti16_lp%=: \n\t"
00067 "lwc1 %[src0], 0(%[src]) \n\t"
00068 "lwc1 %[src1], 4(%[src]) \n\t"
00069 "lwc1 %[src2], 8(%[src]) \n\t"
00070 "lwc1 %[src3], 12(%[src]) \n\t"
00071 "cvt.w.s %[src0], %[src0] \n\t"
00072 "cvt.w.s %[src1], %[src1] \n\t"
00073 "cvt.w.s %[src2], %[src2] \n\t"
00074 "cvt.w.s %[src3], %[src3] \n\t"
00075 "mfc1 %[ret0], %[src0] \n\t"
00076 "mfc1 %[ret1], %[src1] \n\t"
00077 "mfc1 %[ret2], %[src2] \n\t"
00078 "mfc1 %[ret3], %[src3] \n\t"
00079 "lwc1 %[src4], 16(%[src]) \n\t"
00080 "lwc1 %[src5], 20(%[src]) \n\t"
00081 "lwc1 %[src6], 24(%[src]) \n\t"
00082 "lwc1 %[src7], 28(%[src]) \n\t"
00083 "cvt.w.s %[src4], %[src4] \n\t"
00084 "cvt.w.s %[src5], %[src5] \n\t"
00085 "cvt.w.s %[src6], %[src6] \n\t"
00086 "cvt.w.s %[src7], %[src7] \n\t"
00087 "addiu %[src], 32 \n\t"
00088 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00089 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00090 "shll_s.w %[ret2], %[ret2], 16 \n\t"
00091 "shll_s.w %[ret3], %[ret3], 16 \n\t"
00092 "srl %[ret0], %[ret0], 16 \n\t"
00093 "srl %[ret1], %[ret1], 16 \n\t"
00094 "srl %[ret2], %[ret2], 16 \n\t"
00095 "srl %[ret3], %[ret3], 16 \n\t"
00096 "sh %[ret0], 0(%[dst]) \n\t"
00097 "sh %[ret1], 2(%[dst]) \n\t"
00098 "sh %[ret2], 4(%[dst]) \n\t"
00099 "sh %[ret3], 6(%[dst]) \n\t"
00100 "mfc1 %[ret4], %[src4] \n\t"
00101 "mfc1 %[ret5], %[src5] \n\t"
00102 "mfc1 %[ret6], %[src6] \n\t"
00103 "mfc1 %[ret7], %[src7] \n\t"
00104 "shll_s.w %[ret4], %[ret4], 16 \n\t"
00105 "shll_s.w %[ret5], %[ret5], 16 \n\t"
00106 "shll_s.w %[ret6], %[ret6], 16 \n\t"
00107 "shll_s.w %[ret7], %[ret7], 16 \n\t"
00108 "srl %[ret4], %[ret4], 16 \n\t"
00109 "srl %[ret5], %[ret5], 16 \n\t"
00110 "srl %[ret6], %[ret6], 16 \n\t"
00111 "srl %[ret7], %[ret7], 16 \n\t"
00112 "sh %[ret4], 8(%[dst]) \n\t"
00113 "sh %[ret5], 10(%[dst]) \n\t"
00114 "sh %[ret6], 12(%[dst]) \n\t"
00115 "sh %[ret7], 14(%[dst]) \n\t"
00116 "addiu %[dst], 16 \n\t"
00117 "bne %[src], %[src_end], fti16_lp%= \n\t"
00118 "fti16_end%=: \n\t"
00119 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
00120 [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
00121 [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
00122 [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
00123 [src]"+r"(src), [dst]"+r"(dst)
00124 : [src_end]"r"(src_end), [len]"r"(len)
00125 : "memory"
00126 );
00127 }
00128
00129 static void float_to_int16_interleave_mips(int16_t *dst, const float **src, long len,
00130 int channels)
00131 {
00132 int c, ch2 = channels <<1;
00133 int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
00134 float src0, src1, src2, src3, src4, src5, src6, src7;
00135 int16_t *dst_ptr0, *dst_ptr1, *dst_ptr2, *dst_ptr3;
00136 int16_t *dst_ptr4, *dst_ptr5, *dst_ptr6, *dst_ptr7;
00137 const float *src_ptr, *src_ptr2, *src_end;
00138
00139 if (channels == 2) {
00140 src_ptr = &src[0][0];
00141 src_ptr2 = &src[1][0];
00142 src_end = src_ptr + len;
00143
00144 __asm__ volatile (
00145 "fti16i2_lp%=: \n\t"
00146 "lwc1 %[src0], 0(%[src_ptr]) \n\t"
00147 "lwc1 %[src1], 0(%[src_ptr2]) \n\t"
00148 "addiu %[src_ptr], 4 \n\t"
00149 "cvt.w.s $f9, %[src0] \n\t"
00150 "cvt.w.s $f10, %[src1] \n\t"
00151 "mfc1 %[ret0], $f9 \n\t"
00152 "mfc1 %[ret1], $f10 \n\t"
00153 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00154 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00155 "addiu %[src_ptr2], 4 \n\t"
00156 "srl %[ret0], %[ret0], 16 \n\t"
00157 "srl %[ret1], %[ret1], 16 \n\t"
00158 "sh %[ret0], 0(%[dst]) \n\t"
00159 "sh %[ret1], 2(%[dst]) \n\t"
00160 "addiu %[dst], 4 \n\t"
00161 "bne %[src_ptr], %[src_end], fti16i2_lp%= \n\t"
00162 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1),
00163 [src0]"=&f"(src0), [src1]"=&f"(src1),
00164 [src_ptr]"+r"(src_ptr), [src_ptr2]"+r"(src_ptr2),
00165 [dst]"+r"(dst)
00166 : [src_end]"r"(src_end)
00167 : "memory"
00168 );
00169 } else {
00170 for (c = 0; c < channels; c++) {
00171 src_ptr = &src[c][0];
00172 dst_ptr0 = &dst[c];
00173 src_end = src_ptr + len;
00174
00175
00176
00177 __asm__ volatile(
00178 "fti16i_lp%=: \n\t"
00179 "lwc1 %[src0], 0(%[src_ptr]) \n\t"
00180 "lwc1 %[src1], 4(%[src_ptr]) \n\t"
00181 "lwc1 %[src2], 8(%[src_ptr]) \n\t"
00182 "lwc1 %[src3], 12(%[src_ptr]) \n\t"
00183 "cvt.w.s %[src0], %[src0] \n\t"
00184 "cvt.w.s %[src1], %[src1] \n\t"
00185 "cvt.w.s %[src2], %[src2] \n\t"
00186 "cvt.w.s %[src3], %[src3] \n\t"
00187 "mfc1 %[ret0], %[src0] \n\t"
00188 "mfc1 %[ret1], %[src1] \n\t"
00189 "mfc1 %[ret2], %[src2] \n\t"
00190 "mfc1 %[ret3], %[src3] \n\t"
00191 "lwc1 %[src4], 16(%[src_ptr]) \n\t"
00192 "lwc1 %[src5], 20(%[src_ptr]) \n\t"
00193 "lwc1 %[src6], 24(%[src_ptr]) \n\t"
00194 "lwc1 %[src7], 28(%[src_ptr]) \n\t"
00195 "addu %[dst_ptr1], %[dst_ptr0], %[ch2] \n\t"
00196 "addu %[dst_ptr2], %[dst_ptr1], %[ch2] \n\t"
00197 "addu %[dst_ptr3], %[dst_ptr2], %[ch2] \n\t"
00198 "addu %[dst_ptr4], %[dst_ptr3], %[ch2] \n\t"
00199 "addu %[dst_ptr5], %[dst_ptr4], %[ch2] \n\t"
00200 "addu %[dst_ptr6], %[dst_ptr5], %[ch2] \n\t"
00201 "addu %[dst_ptr7], %[dst_ptr6], %[ch2] \n\t"
00202 "addiu %[src_ptr], 32 \n\t"
00203 "cvt.w.s %[src4], %[src4] \n\t"
00204 "cvt.w.s %[src5], %[src5] \n\t"
00205 "cvt.w.s %[src6], %[src6] \n\t"
00206 "cvt.w.s %[src7], %[src7] \n\t"
00207 "shll_s.w %[ret0], %[ret0], 16 \n\t"
00208 "shll_s.w %[ret1], %[ret1], 16 \n\t"
00209 "shll_s.w %[ret2], %[ret2], 16 \n\t"
00210 "shll_s.w %[ret3], %[ret3], 16 \n\t"
00211 "srl %[ret0], %[ret0], 16 \n\t"
00212 "srl %[ret1], %[ret1], 16 \n\t"
00213 "srl %[ret2], %[ret2], 16 \n\t"
00214 "srl %[ret3], %[ret3], 16 \n\t"
00215 "sh %[ret0], 0(%[dst_ptr0]) \n\t"
00216 "sh %[ret1], 0(%[dst_ptr1]) \n\t"
00217 "sh %[ret2], 0(%[dst_ptr2]) \n\t"
00218 "sh %[ret3], 0(%[dst_ptr3]) \n\t"
00219 "mfc1 %[ret4], %[src4] \n\t"
00220 "mfc1 %[ret5], %[src5] \n\t"
00221 "mfc1 %[ret6], %[src6] \n\t"
00222 "mfc1 %[ret7], %[src7] \n\t"
00223 "shll_s.w %[ret4], %[ret4], 16 \n\t"
00224 "shll_s.w %[ret5], %[ret5], 16 \n\t"
00225 "shll_s.w %[ret6], %[ret6], 16 \n\t"
00226 "shll_s.w %[ret7], %[ret7], 16 \n\t"
00227 "srl %[ret4], %[ret4], 16 \n\t"
00228 "srl %[ret5], %[ret5], 16 \n\t"
00229 "srl %[ret6], %[ret6], 16 \n\t"
00230 "srl %[ret7], %[ret7], 16 \n\t"
00231 "sh %[ret4], 0(%[dst_ptr4]) \n\t"
00232 "sh %[ret5], 0(%[dst_ptr5]) \n\t"
00233 "sh %[ret6], 0(%[dst_ptr6]) \n\t"
00234 "sh %[ret7], 0(%[dst_ptr7]) \n\t"
00235 "addu %[dst_ptr0], %[dst_ptr7], %[ch2] \n\t"
00236 "bne %[src_ptr], %[src_end], fti16i_lp%= \n\t"
00237 : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
00238 [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
00239 [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
00240 [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
00241 [dst_ptr1]"=&r"(dst_ptr1), [dst_ptr2]"=&r"(dst_ptr2), [dst_ptr3]"=&r"(dst_ptr3),
00242 [dst_ptr4]"=&r"(dst_ptr4), [dst_ptr5]"=&r"(dst_ptr5), [dst_ptr6]"=&r"(dst_ptr6),
00243 [dst_ptr7]"=&r"(dst_ptr7), [dst_ptr0]"+r"(dst_ptr0), [src_ptr]"+r"(src_ptr)
00244 : [ch2]"r"(ch2), [src_end]"r"(src_end)
00245 : "memory"
00246 );
00247 }
00248 }
00249 }
00250 #endif
00251
00252 static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
00253 float mul, int len)
00254 {
00255
00256
00257
00258 float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
00259
00260 int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23;
00261 const int *src_end = src + len;
00262
00263
00264
00265 __asm__ volatile (
00266 "i32tf_lp%=: \n\t"
00267 "lw %[rpom11], 0(%[src]) \n\t"
00268 "lw %[rpom21], 4(%[src]) \n\t"
00269 "lw %[rpom1], 8(%[src]) \n\t"
00270 "lw %[rpom2], 12(%[src]) \n\t"
00271 "mtc1 %[rpom11], %[temp1] \n\t"
00272 "mtc1 %[rpom21], %[temp3] \n\t"
00273 "mtc1 %[rpom1], %[temp5] \n\t"
00274 "mtc1 %[rpom2], %[temp7] \n\t"
00275
00276 "lw %[rpom13], 16(%[src]) \n\t"
00277 "lw %[rpom23], 20(%[src]) \n\t"
00278 "lw %[rpom12], 24(%[src]) \n\t"
00279 "lw %[rpom22], 28(%[src]) \n\t"
00280 "mtc1 %[rpom13], %[temp9] \n\t"
00281 "mtc1 %[rpom23], %[temp11] \n\t"
00282 "mtc1 %[rpom12], %[temp13] \n\t"
00283 "mtc1 %[rpom22], %[temp15] \n\t"
00284
00285 "addiu %[src], 32 \n\t"
00286 "cvt.s.w %[temp1], %[temp1] \n\t"
00287 "cvt.s.w %[temp3], %[temp3] \n\t"
00288 "cvt.s.w %[temp5], %[temp5] \n\t"
00289 "cvt.s.w %[temp7], %[temp7] \n\t"
00290
00291 "cvt.s.w %[temp9], %[temp9] \n\t"
00292 "cvt.s.w %[temp11], %[temp11] \n\t"
00293 "cvt.s.w %[temp13], %[temp13] \n\t"
00294 "cvt.s.w %[temp15], %[temp15] \n\t"
00295
00296 "mul.s %[temp1], %[temp1], %[mul] \n\t"
00297 "mul.s %[temp3], %[temp3], %[mul] \n\t"
00298 "mul.s %[temp5], %[temp5], %[mul] \n\t"
00299 "mul.s %[temp7], %[temp7], %[mul] \n\t"
00300
00301 "mul.s %[temp9], %[temp9], %[mul] \n\t"
00302 "mul.s %[temp11], %[temp11], %[mul] \n\t"
00303 "mul.s %[temp13], %[temp13], %[mul] \n\t"
00304 "mul.s %[temp15], %[temp15], %[mul] \n\t"
00305
00306 "swc1 %[temp1], 0(%[dst]) \n\t"
00307 "swc1 %[temp3], 4(%[dst]) \n\t"
00308 "swc1 %[temp5], 8(%[dst]) \n\t"
00309 "swc1 %[temp7], 12(%[dst]) \n\t"
00310
00311 "swc1 %[temp9], 16(%[dst]) \n\t"
00312 "swc1 %[temp11], 20(%[dst]) \n\t"
00313 "swc1 %[temp13], 24(%[dst]) \n\t"
00314 "swc1 %[temp15], 28(%[dst]) \n\t"
00315 "addiu %[dst], 32 \n\t"
00316 "bne %[src], %[src_end], i32tf_lp%= \n\t"
00317 : [temp1]"=&f"(temp1), [temp11]"=&f"(temp11),
00318 [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
00319 [temp3]"=&f"(temp3), [temp5]"=&f"(temp5),
00320 [temp7]"=&f"(temp7), [temp9]"=&f"(temp9),
00321 [rpom1]"=&r"(rpom1), [rpom2]"=&r"(rpom2),
00322 [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21),
00323 [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22),
00324 [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23),
00325 [dst]"+r"(dst), [src]"+r"(src)
00326 : [mul]"f"(mul), [src_end]"r"(src_end)
00327 : "memory"
00328 );
00329 }
00330
00331 av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c)
00332 {
00333 #if HAVE_MIPSDSPR1
00334 c->float_to_int16_interleave = float_to_int16_interleave_mips;
00335 c->float_to_int16 = float_to_int16_mips;
00336 #endif
00337 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips;
00338 }