FFmpeg
aacsbr_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors: Djordje Pesut (djordje@mips.com)
30  * Mirjana Vulin (mvulin@mips.com)
31  *
32  * This file is part of FFmpeg.
33  *
34  * FFmpeg is free software; you can redistribute it and/or
35  * modify it under the terms of the GNU Lesser General Public
36  * License as published by the Free Software Foundation; either
37  * version 2.1 of the License, or (at your option) any later version.
38  *
39  * FFmpeg is distributed in the hope that it will be useful,
40  * but WITHOUT ANY WARRANTY; without even the implied warranty of
41  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42  * Lesser General Public License for more details.
43  *
44  * You should have received a copy of the GNU Lesser General Public
45  * License along with FFmpeg; if not, write to the Free Software
46  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47  */
48 
49 /**
50  * @file
51  * Reference: libavcodec/aacsbr.c
52  */
53 
54 #include "libavcodec/aac.h"
55 #include "libavcodec/aacsbr.h"
56 #include "libavutil/mem_internal.h"
57 #include "libavutil/mips/asmdefs.h"
58 
59 #define ENVELOPE_ADJUSTMENT_OFFSET 2
60 
61 #if HAVE_INLINE_ASM
62 #if HAVE_MIPSFPU
63 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
64  float X_low[32][40][2], const float W[2][32][32][2],
65  int buf_idx)
66 {
67  int i, k;
68  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
69  float *p_x_low = &X_low[0][8][0];
70  float *p_w = (float*)&W[buf_idx][0][0][0];
71  float *p_x1_low = &X_low[0][0][0];
72  float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
73 
74  float *loop_end=p_x1_low + 2560;
75 
76  /* loop unrolled 8 times */
77  __asm__ volatile (
78  "1: \n\t"
79  "sw $0, 0(%[p_x1_low]) \n\t"
80  "sw $0, 4(%[p_x1_low]) \n\t"
81  "sw $0, 8(%[p_x1_low]) \n\t"
82  "sw $0, 12(%[p_x1_low]) \n\t"
83  "sw $0, 16(%[p_x1_low]) \n\t"
84  "sw $0, 20(%[p_x1_low]) \n\t"
85  "sw $0, 24(%[p_x1_low]) \n\t"
86  "sw $0, 28(%[p_x1_low]) \n\t"
87  PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
88  "bne %[p_x1_low], %[loop_end], 1b \n\t"
89  PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
90 
91  : [p_x1_low]"+r"(p_x1_low)
92  : [loop_end]"r"(loop_end)
93  : "memory"
94  );
95 
96  for (k = 0; k < sbr->kx[1]; k++) {
97  for (i = 0; i < 32; i+=4) {
98  /* loop unrolled 4 times */
99  __asm__ volatile (
100  "lw %[temp0], 0(%[p_w]) \n\t"
101  "lw %[temp1], 4(%[p_w]) \n\t"
102  "lw %[temp2], 256(%[p_w]) \n\t"
103  "lw %[temp3], 260(%[p_w]) \n\t"
104  "lw %[temp4], 512(%[p_w]) \n\t"
105  "lw %[temp5], 516(%[p_w]) \n\t"
106  "lw %[temp6], 768(%[p_w]) \n\t"
107  "lw %[temp7], 772(%[p_w]) \n\t"
108  "sw %[temp0], 0(%[p_x_low]) \n\t"
109  "sw %[temp1], 4(%[p_x_low]) \n\t"
110  "sw %[temp2], 8(%[p_x_low]) \n\t"
111  "sw %[temp3], 12(%[p_x_low]) \n\t"
112  "sw %[temp4], 16(%[p_x_low]) \n\t"
113  "sw %[temp5], 20(%[p_x_low]) \n\t"
114  "sw %[temp6], 24(%[p_x_low]) \n\t"
115  "sw %[temp7], 28(%[p_x_low]) \n\t"
116  PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
117  PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
118 
119  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
120  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
121  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
122  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
123  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
124  :
125  : "memory"
126  );
127  }
128  p_x_low += 16;
129  p_w -= 2046;
130  }
131 
132  for (k = 0; k < sbr->kx[0]; k++) {
133  for (i = 0; i < 2; i++) {
134 
135  /* loop unrolled 4 times */
136  __asm__ volatile (
137  "lw %[temp0], 0(%[p_w1]) \n\t"
138  "lw %[temp1], 4(%[p_w1]) \n\t"
139  "lw %[temp2], 256(%[p_w1]) \n\t"
140  "lw %[temp3], 260(%[p_w1]) \n\t"
141  "lw %[temp4], 512(%[p_w1]) \n\t"
142  "lw %[temp5], 516(%[p_w1]) \n\t"
143  "lw %[temp6], 768(%[p_w1]) \n\t"
144  "lw %[temp7], 772(%[p_w1]) \n\t"
145  "sw %[temp0], 0(%[p_x1_low]) \n\t"
146  "sw %[temp1], 4(%[p_x1_low]) \n\t"
147  "sw %[temp2], 8(%[p_x1_low]) \n\t"
148  "sw %[temp3], 12(%[p_x1_low]) \n\t"
149  "sw %[temp4], 16(%[p_x1_low]) \n\t"
150  "sw %[temp5], 20(%[p_x1_low]) \n\t"
151  "sw %[temp6], 24(%[p_x1_low]) \n\t"
152  "sw %[temp7], 28(%[p_x1_low]) \n\t"
153  PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
154  PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
155 
156  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
157  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
158  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
159  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
160  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
161  :
162  : "memory"
163  );
164  }
165  p_x1_low += 64;
166  p_w1 -= 510;
167  }
168  return 0;
169 }
170 
171 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
172  const float Y0[38][64][2], const float Y1[38][64][2],
173  const float X_low[32][40][2], int ch)
174 {
175  int k, i;
176  const int i_f = 32;
177  int temp0, temp1, temp2, temp3;
178  const float *X_low1, *Y01, *Y11;
179  float *x1=&X[0][0][0];
180  float *j=x1+4864;
181  const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
182 
183  /* loop unrolled 8 times */
184  __asm__ volatile (
185  "1: \n\t"
186  "sw $0, 0(%[x1]) \n\t"
187  "sw $0, 4(%[x1]) \n\t"
188  "sw $0, 8(%[x1]) \n\t"
189  "sw $0, 12(%[x1]) \n\t"
190  "sw $0, 16(%[x1]) \n\t"
191  "sw $0, 20(%[x1]) \n\t"
192  "sw $0, 24(%[x1]) \n\t"
193  "sw $0, 28(%[x1]) \n\t"
194  PTR_ADDIU "%[x1],%[x1], 32 \n\t"
195  "bne %[x1], %[j], 1b \n\t"
196  PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
197 
198  : [x1]"+r"(x1)
199  : [j]"r"(j)
200  : "memory"
201  );
202 
203  if (i_Temp != 0) {
204 
205  X_low1=&X_low[0][2][0];
206 
207  for (k = 0; k < sbr->kx[0]; k++) {
208 
209  __asm__ volatile (
210  "move %[i], $zero \n\t"
211  "2: \n\t"
212  "lw %[temp0], 0(%[X_low1]) \n\t"
213  "lw %[temp1], 4(%[X_low1]) \n\t"
214  "sw %[temp0], 0(%[x1]) \n\t"
215  "sw %[temp1], 9728(%[x1]) \n\t"
216  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
217  PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
218  "addiu %[i], %[i], 1 \n\t"
219  "bne %[i], %[i_Temp], 2b \n\t"
220 
221  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
222  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
223  : [i_Temp]"r"(i_Temp)
224  : "memory"
225  );
226  x1-=(i_Temp<<6)-1;
227  X_low1-=(i_Temp<<1)-80;
228  }
229 
230  x1=&X[0][0][k];
231  Y01=(float*)&Y0[32][k][0];
232 
233  for (; k < sbr->kx[0] + sbr->m[0]; k++) {
234  __asm__ volatile (
235  "move %[i], $zero \n\t"
236  "3: \n\t"
237  "lw %[temp0], 0(%[Y01]) \n\t"
238  "lw %[temp1], 4(%[Y01]) \n\t"
239  "sw %[temp0], 0(%[x1]) \n\t"
240  "sw %[temp1], 9728(%[x1]) \n\t"
241  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
242  PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
243  "addiu %[i], %[i], 1 \n\t"
244  "bne %[i], %[i_Temp], 3b \n\t"
245 
246  : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
247  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
248  : [i_Temp]"r"(i_Temp)
249  : "memory"
250  );
251  x1 -=(i_Temp<<6)-1;
252  Y01 -=(i_Temp<<7)-2;
253  }
254  }
255 
256  x1=&X[0][i_Temp][0];
257  X_low1=&X_low[0][i_Temp+2][0];
258  temp3=38;
259 
260  for (k = 0; k < sbr->kx[1]; k++) {
261 
262  __asm__ volatile (
263  "move %[i], %[i_Temp] \n\t"
264  "4: \n\t"
265  "lw %[temp0], 0(%[X_low1]) \n\t"
266  "lw %[temp1], 4(%[X_low1]) \n\t"
267  "sw %[temp0], 0(%[x1]) \n\t"
268  "sw %[temp1], 9728(%[x1]) \n\t"
269  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
270  PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
271  "addiu %[i], %[i], 1 \n\t"
272  "bne %[i], %[temp3], 4b \n\t"
273 
274  : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
275  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
276  [temp2]"=&r"(temp2)
277  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
278  : "memory"
279  );
280  x1 -= ((38-i_Temp)<<6)-1;
281  X_low1 -= ((38-i_Temp)<<1)- 80;
282  }
283 
284  x1=&X[0][i_Temp][k];
285  Y11=&Y1[i_Temp][k][0];
286  temp2=32;
287 
288  for (; k < sbr->kx[1] + sbr->m[1]; k++) {
289 
290  __asm__ volatile (
291  "move %[i], %[i_Temp] \n\t"
292  "5: \n\t"
293  "lw %[temp0], 0(%[Y11]) \n\t"
294  "lw %[temp1], 4(%[Y11]) \n\t"
295  "sw %[temp0], 0(%[x1]) \n\t"
296  "sw %[temp1], 9728(%[x1]) \n\t"
297  PTR_ADDIU "%[x1], %[x1], 256 \n\t"
298  PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
299  "addiu %[i], %[i], 1 \n\t"
300  "bne %[i], %[temp2], 5b \n\t"
301 
302  : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
303  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
304  : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
305  [temp2]"r"(temp2)
306  : "memory"
307  );
308 
309  x1 -= ((32-i_Temp)<<6)-1;
310  Y11 -= ((32-i_Temp)<<7)-2;
311  }
312  return 0;
313 }
314 
315 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
316 static void sbr_hf_assemble_mips(float Y1[38][64][2],
317  const float X_high[64][40][2],
318  SpectralBandReplication *sbr, SBRData *ch_data,
319  const int e_a[2])
320 {
321  int e, i, j, m;
322  const int h_SL = 4 * !sbr->bs_smoothing_mode;
323  const int kx = sbr->kx[1];
324  const int m_max = sbr->m[1];
325  static const float h_smooth[5] = {
326  0.33333333333333,
327  0.30150283239582,
328  0.21816949906249,
329  0.11516383427084,
330  0.03183050093751,
331  };
332 
333  float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
334  int indexnoise = ch_data->f_indexnoise;
335  int indexsine = ch_data->f_indexsine;
336  float *g_temp1, *q_temp1, *pok, *pok1;
337  uint32_t temp1, temp2, temp3, temp4;
338  int size = m_max;
339 
340  if (sbr->reset) {
341  for (i = 0; i < h_SL; i++) {
342  memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
343  memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
344  }
345  } else if (h_SL) {
346  memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
347  memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
348  }
349 
350  for (e = 0; e < ch_data->bs_num_env; e++) {
351  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
352  g_temp1 = g_temp[h_SL + i];
353  pok = sbr->gain[e];
354  q_temp1 = q_temp[h_SL + i];
355  pok1 = sbr->q_m[e];
356 
357  /* loop unrolled 4 times */
358  for (j=0; j<(size>>2); j++) {
359  __asm__ volatile (
360  "lw %[temp1], 0(%[pok]) \n\t"
361  "lw %[temp2], 4(%[pok]) \n\t"
362  "lw %[temp3], 8(%[pok]) \n\t"
363  "lw %[temp4], 12(%[pok]) \n\t"
364  "sw %[temp1], 0(%[g_temp1]) \n\t"
365  "sw %[temp2], 4(%[g_temp1]) \n\t"
366  "sw %[temp3], 8(%[g_temp1]) \n\t"
367  "sw %[temp4], 12(%[g_temp1]) \n\t"
368  "lw %[temp1], 0(%[pok1]) \n\t"
369  "lw %[temp2], 4(%[pok1]) \n\t"
370  "lw %[temp3], 8(%[pok1]) \n\t"
371  "lw %[temp4], 12(%[pok1]) \n\t"
372  "sw %[temp1], 0(%[q_temp1]) \n\t"
373  "sw %[temp2], 4(%[q_temp1]) \n\t"
374  "sw %[temp3], 8(%[q_temp1]) \n\t"
375  "sw %[temp4], 12(%[q_temp1]) \n\t"
376  PTR_ADDIU "%[pok], %[pok], 16 \n\t"
377  PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
378  PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
379  PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
380 
381  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
382  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
383  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
384  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
385  :
386  : "memory"
387  );
388  }
389 
390  for (j=0; j<(size&3); j++) {
391  __asm__ volatile (
392  "lw %[temp1], 0(%[pok]) \n\t"
393  "lw %[temp2], 0(%[pok1]) \n\t"
394  "sw %[temp1], 0(%[g_temp1]) \n\t"
395  "sw %[temp2], 0(%[q_temp1]) \n\t"
396  PTR_ADDIU "%[pok], %[pok], 4 \n\t"
397  PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
398  PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
399  PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
400 
401  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
402  [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
403  [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
404  [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
405  :
406  : "memory"
407  );
408  }
409  }
410  }
411 
412  for (e = 0; e < ch_data->bs_num_env; e++) {
413  for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
414  LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
415  LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
416  float *g_filt, *q_filt;
417 
418  if (h_SL && e != e_a[0] && e != e_a[1]) {
419  g_filt = g_filt_tab;
420  q_filt = q_filt_tab;
421 
422  for (m = 0; m < m_max; m++) {
423  const int idx1 = i + h_SL;
424  g_filt[m] = 0.0f;
425  q_filt[m] = 0.0f;
426 
427  for (j = 0; j <= h_SL; j++) {
428  g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
429  q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
430  }
431  }
432  } else {
433  g_filt = g_temp[i + h_SL];
434  q_filt = q_temp[i];
435  }
436 
437  sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
439 
440  if (e != e_a[0] && e != e_a[1]) {
441  sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
442  q_filt, indexnoise,
443  kx, m_max);
444  } else {
445  int idx = indexsine&1;
446  int A = (1-((indexsine+(kx & 1))&2));
447  int B = (A^(-idx)) + idx;
448  float *out = &Y1[i][kx][idx];
449  float *in = sbr->s_m[e];
450  float temp0, temp1, temp2, temp3, temp4, temp5;
451  float A_f = (float)A;
452  float B_f = (float)B;
453 
454  for (m = 0; m+1 < m_max; m+=2) {
455 
456  temp2 = out[0];
457  temp3 = out[2];
458 
459  __asm__ volatile(
460  "lwc1 %[temp0], 0(%[in]) \n\t"
461  "lwc1 %[temp1], 4(%[in]) \n\t"
462  "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
463  "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
464  "swc1 %[temp4], 0(%[out]) \n\t"
465  "swc1 %[temp5], 8(%[out]) \n\t"
466  PTR_ADDIU "%[in], %[in], 8 \n\t"
467  PTR_ADDIU "%[out], %[out], 16 \n\t"
468 
469  : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
470  [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
471  [in]"+r"(in), [out]"+r"(out)
472  : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
473  [temp3]"f"(temp3)
474  : "memory"
475  );
476  }
477  if(m_max&1)
478  out[2*m ] += in[m ] * A;
479  }
480  indexnoise = (indexnoise + m_max) & 0x1ff;
481  indexsine = (indexsine + 1) & 3;
482  }
483  }
484  ch_data->f_indexnoise = indexnoise;
485  ch_data->f_indexsine = indexsine;
486 }
487 
488 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
489  float (*alpha0)[2], float (*alpha1)[2],
490  const float X_low[32][40][2], int k0)
491 {
492  int k;
493  float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
494  float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
495 
496  c = 1.000001f;
497 
498  for (k = 0; k < k0; k++) {
499  LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
500  float dk;
501  phi1 = &phi[0][0][0];
502  alpha_1 = &alpha1[k][0];
503  alpha_0 = &alpha0[k][0];
504  dsp->autocorrelate(X_low[k], phi);
505 
506  __asm__ volatile (
507  "lwc1 %[temp0], 40(%[phi1]) \n\t"
508  "lwc1 %[temp1], 16(%[phi1]) \n\t"
509  "lwc1 %[temp2], 24(%[phi1]) \n\t"
510  "lwc1 %[temp3], 28(%[phi1]) \n\t"
511  "mul.s %[dk], %[temp0], %[temp1] \n\t"
512  "lwc1 %[temp4], 0(%[phi1]) \n\t"
513  "mul.s %[res2], %[temp2], %[temp2] \n\t"
514  "lwc1 %[temp5], 4(%[phi1]) \n\t"
515  "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
516  "lwc1 %[temp6], 8(%[phi1]) \n\t"
517  "div.s %[res2], %[res2], %[c] \n\t"
518  "lwc1 %[temp0], 12(%[phi1]) \n\t"
519  "sub.s %[dk], %[dk], %[res2] \n\t"
520 
521  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
522  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
523  [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
524  : [phi1]"r"(phi1), [c]"f"(c)
525  : "memory"
526  );
527 
528  if (!dk) {
529  alpha_1[0] = 0;
530  alpha_1[1] = 0;
531  } else {
532  __asm__ volatile (
533  "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
534  "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
535  "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
536  "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
537  "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
538  "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
539  "div.s %[temp_real], %[temp_real], %[dk] \n\t"
540  "div.s %[temp_im], %[temp_im], %[dk] \n\t"
541  "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
542  "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
543 
544  : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
545  : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
546  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
547  [temp5]"f"(temp5), [temp6]"f"(temp6),
548  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
549  : "memory"
550  );
551  }
552 
553  if (!phi1[4]) {
554  alpha_0[0] = 0;
555  alpha_0[1] = 0;
556  } else {
557  __asm__ volatile (
558  "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
559  "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
560  "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
561  "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
562  "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
563  "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
564  "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
565  "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
566  "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
567  "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
568  "neg.s %[temp_real], %[temp_real] \n\t"
569  "neg.s %[temp_im], %[temp_im] \n\t"
570  "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
571  "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
572 
573  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
574  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
575  [res1]"=&f"(res1), [res2]"=&f"(res2)
576  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
577  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
578  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
579  : "memory"
580  );
581  }
582 
583  __asm__ volatile (
584  "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
585  "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
586  "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
587  "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
588  "mul.s %[res1], %[temp1], %[temp1] \n\t"
589  "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
590  "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
591  "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
592 
593  : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
594  [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
595  [res1]"=&f"(res1), [res2]"=&f"(res2)
596  : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
597  : "memory"
598  );
599 
600  if (res1 >= 16.0f || res2 >= 16.0f) {
601  alpha_1[0] = 0;
602  alpha_1[1] = 0;
603  alpha_0[0] = 0;
604  alpha_0[1] = 0;
605  }
606  }
607 }
608 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
609 #endif /* HAVE_MIPSFPU */
610 #endif /* HAVE_INLINE_ASM */
611 
613 {
614 #if HAVE_INLINE_ASM
615 #if HAVE_MIPSFPU
616  c->sbr_lf_gen = sbr_lf_gen_mips;
617  c->sbr_x_gen = sbr_x_gen_mips;
618 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
619  c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
620  c->sbr_hf_assemble = sbr_hf_assemble_mips;
621 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
622 #endif /* HAVE_MIPSFPU */
623 #endif /* HAVE_INLINE_ASM */
624 }
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
int(* sbr_lf_gen)(AACContext *ac, SpectralBandReplication *sbr, INTFLOAT X_low[32][40][2], const INTFLOAT W[2][32][32][2], int buf_idx)
Definition: sbr.h:124
unsigned bs_smoothing_mode
Definition: sbr.h:157
#define ENVELOPE_ADJUSTMENT_OFFSET
Definition: aacsbr_mips.c:59
AAC_SIGNE kx[2]
kx&#39;, and kx respectively, kx is the first QMF subband where SBR is used.
Definition: sbr.h:163
Definition: vf_addroi.c:26
AAC_FLOAT gain[7][48]
Definition: sbr.h:212
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
#define f(width, name)
Definition: cbs_vp9.c:255
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SBRData data[2]
Definition: sbr.h:169
ptrdiff_t size
Definition: opengl_enc.c:100
#define A(x)
Definition: vp56_arith.h:28
AAC_SIGNE m[2]
M&#39; and M respectively, M is the number of QMF subbands that use SBR.
Definition: sbr.h:165
#define B
Definition: huffyuvdsp.h:32
AAC_FLOAT g_temp[42][48]
Definition: sbr.h:98
#define FFMAX(a, b)
Definition: common.h:103
unsigned f_indexnoise
Definition: sbr.h:113
uint8_t t_env_num_env_old
Envelope time border of the last envelope of the previous frame.
Definition: sbr.h:110
AAC Spectral Band Replication function declarations.
void(* sbr_hf_inverse_filter)(SBRDSPContext *dsp, INTFLOAT(*alpha0)[2], INTFLOAT(*alpha1)[2], const INTFLOAT X_low[32][40][2], int k0)
Definition: sbr.h:134
AAC definitions and structures.
AAC_FLOAT q_temp[42][48]
Definition: sbr.h:99
int(* sbr_x_gen)(SpectralBandReplication *sbr, INTFLOAT X[2][38][64], const INTFLOAT Y0[38][64][2], const INTFLOAT Y1[38][64][2], const INTFLOAT X_low[32][40][2], int ch)
Definition: sbr.h:131
AAC_SIGNE bs_num_env
Definition: sbr.h:72
#define PTR_ADDIU
Definition: asmdefs.h:48
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
AAC_FLOAT q_m[7][48]
Amplitude adjusted noise scalefactors.
Definition: sbr.h:209
main AAC context
Definition: aac.h:294
void(* autocorrelate)(const INTFLOAT x[40][2], AAC_FLOAT phi[3][2][2])
Definition: sbrdsp.h:36
unsigned f_indexsine
Definition: sbr.h:114
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
uint8_t t_env[8]
Envelope time borders.
Definition: sbr.h:108
aacsbr functions pointers
Definition: sbr.h:123
AAC_FLOAT s_m[7][48]
Sinusoidal levels.
Definition: sbr.h:211
Spectral Band Replication per channel data.
Definition: sbr.h:65
void(* hf_apply_noise[4])(INTFLOAT(*Y)[2], const AAC_FLOAT *s_m, const AAC_FLOAT *q_filt, int noise, int kx, int m_max)
Definition: sbrdsp.h:42
void(* sbr_hf_assemble)(INTFLOAT Y1[38][64][2], const INTFLOAT X_high[64][40][2], SpectralBandReplication *sbr, SBRData *ch_data, const int e_a[2])
Definition: sbr.h:127
void(* hf_g_filt)(INTFLOAT(*Y)[2], const INTFLOAT(*X_high)[40][2], const AAC_FLOAT *g_filt, int m_max, intptr_t ixh)
Definition: sbrdsp.h:40
SBRDSPContext dsp
Definition: sbr.h:216
FILE * out
Definition: movenc.c:54
Spectral Band Replication.
Definition: sbr.h:142
Definition: vf_addroi.c:26
int i
Definition: input.c:407
void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
Definition: aacsbr_mips.c:612