FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aaccoder_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Stanislav Ocovaj (socovaj@mips.com)
30  * Szabolcs Pal (sabolc@mips.com)
31  *
32  * AAC coefficients encoder optimized for MIPS floating-point architecture
33  *
34  * This file is part of FFmpeg.
35  *
36  * FFmpeg is free software; you can redistribute it and/or
37  * modify it under the terms of the GNU Lesser General Public
38  * License as published by the Free Software Foundation; either
39  * version 2.1 of the License, or (at your option) any later version.
40  *
41  * FFmpeg is distributed in the hope that it will be useful,
42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44  * Lesser General Public License for more details.
45  *
46  * You should have received a copy of the GNU Lesser General Public
47  * License along with FFmpeg; if not, write to the Free Software
48  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49  */
50 
51 /**
52  * @file
53  * Reference: libavcodec/aaccoder.c
54  */
55 
56 #include "libavutil/libm.h"
57 
58 #include <float.h>
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 #include "libavcodec/aacenctab.h"
67 
68 #if HAVE_INLINE_ASM
69 typedef struct BandCodingPath {
70  int prev_idx;
71  float cost;
72  int run;
74 
75 static const uint8_t uquad_sign_bits[81] = {
76  0, 1, 1, 1, 2, 2, 1, 2, 2,
77  1, 2, 2, 2, 3, 3, 2, 3, 3,
78  1, 2, 2, 2, 3, 3, 2, 3, 3,
79  1, 2, 2, 2, 3, 3, 2, 3, 3,
80  2, 3, 3, 3, 4, 4, 3, 4, 4,
81  2, 3, 3, 3, 4, 4, 3, 4, 4,
82  1, 2, 2, 2, 3, 3, 2, 3, 3,
83  2, 3, 3, 3, 4, 4, 3, 4, 4,
84  2, 3, 3, 3, 4, 4, 3, 4, 4
85 };
86 
87 static const uint8_t upair7_sign_bits[64] = {
88  0, 1, 1, 1, 1, 1, 1, 1,
89  1, 2, 2, 2, 2, 2, 2, 2,
90  1, 2, 2, 2, 2, 2, 2, 2,
91  1, 2, 2, 2, 2, 2, 2, 2,
92  1, 2, 2, 2, 2, 2, 2, 2,
93  1, 2, 2, 2, 2, 2, 2, 2,
94  1, 2, 2, 2, 2, 2, 2, 2,
95  1, 2, 2, 2, 2, 2, 2, 2,
96 };
97 
98 static const uint8_t upair12_sign_bits[169] = {
99  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
112 };
113 
114 static const uint8_t esc_sign_bits[289] = {
115  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
132 };
133 
134 /**
135  * Functions developed from template function and optimized for quantizing and encoding band
136  */
137 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
138  PutBitContext *pb, const float *in, float *out,
139  const float *scaled, int size, int scale_idx,
140  int cb, const float lambda, const float uplim,
141  int *bits, float *energy, const float ROUNDING)
142 {
143  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
144  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
145  int i;
146  int qc1, qc2, qc3, qc4;
147  float qenergy = 0.0f;
148 
149  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
150  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
151  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
152 
153  abs_pow34_v(s->scoefs, in, size);
154  scaled = s->scoefs;
155  for (i = 0; i < size; i += 4) {
156  int curidx;
157  int *in_int = (int *)&in[i];
158  int t0, t1, t2, t3, t4, t5, t6, t7;
159  const float *vec;
160 
161  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
162  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
163  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
164  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
165 
166  __asm__ volatile (
167  ".set push \n\t"
168  ".set noreorder \n\t"
169 
170  "slt %[qc1], $zero, %[qc1] \n\t"
171  "slt %[qc2], $zero, %[qc2] \n\t"
172  "slt %[qc3], $zero, %[qc3] \n\t"
173  "slt %[qc4], $zero, %[qc4] \n\t"
174  "lw %[t0], 0(%[in_int]) \n\t"
175  "lw %[t1], 4(%[in_int]) \n\t"
176  "lw %[t2], 8(%[in_int]) \n\t"
177  "lw %[t3], 12(%[in_int]) \n\t"
178  "srl %[t0], %[t0], 31 \n\t"
179  "srl %[t1], %[t1], 31 \n\t"
180  "srl %[t2], %[t2], 31 \n\t"
181  "srl %[t3], %[t3], 31 \n\t"
182  "subu %[t4], $zero, %[qc1] \n\t"
183  "subu %[t5], $zero, %[qc2] \n\t"
184  "subu %[t6], $zero, %[qc3] \n\t"
185  "subu %[t7], $zero, %[qc4] \n\t"
186  "movn %[qc1], %[t4], %[t0] \n\t"
187  "movn %[qc2], %[t5], %[t1] \n\t"
188  "movn %[qc3], %[t6], %[t2] \n\t"
189  "movn %[qc4], %[t7], %[t3] \n\t"
190 
191  ".set pop \n\t"
192 
193  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
194  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
195  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
196  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
197  : [in_int]"r"(in_int)
198  : "memory"
199  );
200 
201  curidx = qc1;
202  curidx *= 3;
203  curidx += qc2;
204  curidx *= 3;
205  curidx += qc3;
206  curidx *= 3;
207  curidx += qc4;
208  curidx += 40;
209 
210  put_bits(pb, p_bits[curidx], p_codes[curidx]);
211 
212  if (out || energy) {
213  float e1,e2,e3,e4;
214  vec = &p_vec[curidx*4];
215  e1 = vec[0] * IQ;
216  e2 = vec[1] * IQ;
217  e3 = vec[2] * IQ;
218  e4 = vec[3] * IQ;
219  if (out) {
220  out[i+0] = e1;
221  out[i+1] = e2;
222  out[i+2] = e3;
223  out[i+3] = e4;
224  }
225  if (energy)
226  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
227  }
228  }
229  if (energy)
230  *energy = qenergy;
231 }
232 
233 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
234  PutBitContext *pb, const float *in, float *out,
235  const float *scaled, int size, int scale_idx,
236  int cb, const float lambda, const float uplim,
237  int *bits, float *energy, const float ROUNDING)
238 {
239  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
240  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
241  int i;
242  int qc1, qc2, qc3, qc4;
243  float qenergy = 0.0f;
244 
245  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
246  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
247  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
248 
249  abs_pow34_v(s->scoefs, in, size);
250  scaled = s->scoefs;
251  for (i = 0; i < size; i += 4) {
252  int curidx, sign, count;
253  int *in_int = (int *)&in[i];
254  uint8_t v_bits;
255  unsigned int v_codes;
256  int t0, t1, t2, t3, t4;
257  const float *vec;
258 
259  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
260  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
261  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
262  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
263 
264  __asm__ volatile (
265  ".set push \n\t"
266  ".set noreorder \n\t"
267 
268  "ori %[t4], $zero, 2 \n\t"
269  "ori %[sign], $zero, 0 \n\t"
270  "slt %[t0], %[t4], %[qc1] \n\t"
271  "slt %[t1], %[t4], %[qc2] \n\t"
272  "slt %[t2], %[t4], %[qc3] \n\t"
273  "slt %[t3], %[t4], %[qc4] \n\t"
274  "movn %[qc1], %[t4], %[t0] \n\t"
275  "movn %[qc2], %[t4], %[t1] \n\t"
276  "movn %[qc3], %[t4], %[t2] \n\t"
277  "movn %[qc4], %[t4], %[t3] \n\t"
278  "lw %[t0], 0(%[in_int]) \n\t"
279  "lw %[t1], 4(%[in_int]) \n\t"
280  "lw %[t2], 8(%[in_int]) \n\t"
281  "lw %[t3], 12(%[in_int]) \n\t"
282  "slt %[t0], %[t0], $zero \n\t"
283  "movn %[sign], %[t0], %[qc1] \n\t"
284  "slt %[t1], %[t1], $zero \n\t"
285  "slt %[t2], %[t2], $zero \n\t"
286  "slt %[t3], %[t3], $zero \n\t"
287  "sll %[t0], %[sign], 1 \n\t"
288  "or %[t0], %[t0], %[t1] \n\t"
289  "movn %[sign], %[t0], %[qc2] \n\t"
290  "slt %[t4], $zero, %[qc1] \n\t"
291  "slt %[t1], $zero, %[qc2] \n\t"
292  "slt %[count], $zero, %[qc3] \n\t"
293  "sll %[t0], %[sign], 1 \n\t"
294  "or %[t0], %[t0], %[t2] \n\t"
295  "movn %[sign], %[t0], %[qc3] \n\t"
296  "slt %[t2], $zero, %[qc4] \n\t"
297  "addu %[count], %[count], %[t4] \n\t"
298  "addu %[count], %[count], %[t1] \n\t"
299  "sll %[t0], %[sign], 1 \n\t"
300  "or %[t0], %[t0], %[t3] \n\t"
301  "movn %[sign], %[t0], %[qc4] \n\t"
302  "addu %[count], %[count], %[t2] \n\t"
303 
304  ".set pop \n\t"
305 
306  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
307  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
308  [sign]"=&r"(sign), [count]"=&r"(count),
309  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
310  [t4]"=&r"(t4)
311  : [in_int]"r"(in_int)
312  : "memory"
313  );
314 
315  curidx = qc1;
316  curidx *= 3;
317  curidx += qc2;
318  curidx *= 3;
319  curidx += qc3;
320  curidx *= 3;
321  curidx += qc4;
322 
323  v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
324  v_bits = p_bits[curidx] + count;
325  put_bits(pb, v_bits, v_codes);
326 
327  if (out || energy) {
328  float e1,e2,e3,e4;
329  vec = &p_vec[curidx*4];
330  e1 = copysignf(vec[0] * IQ, in[i+0]);
331  e2 = copysignf(vec[1] * IQ, in[i+1]);
332  e3 = copysignf(vec[2] * IQ, in[i+2]);
333  e4 = copysignf(vec[3] * IQ, in[i+3]);
334  if (out) {
335  out[i+0] = e1;
336  out[i+1] = e2;
337  out[i+2] = e3;
338  out[i+3] = e4;
339  }
340  if (energy)
341  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
342  }
343  }
344  if (energy)
345  *energy = qenergy;
346 }
347 
348 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
349  PutBitContext *pb, const float *in, float *out,
350  const float *scaled, int size, int scale_idx,
351  int cb, const float lambda, const float uplim,
352  int *bits, float *energy, const float ROUNDING)
353 {
354  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
355  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
356  int i;
357  int qc1, qc2, qc3, qc4;
358  float qenergy = 0.0f;
359 
360  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
361  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
362  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
363 
364  abs_pow34_v(s->scoefs, in, size);
365  scaled = s->scoefs;
366  for (i = 0; i < size; i += 4) {
367  int curidx, curidx2;
368  int *in_int = (int *)&in[i];
369  uint8_t v_bits;
370  unsigned int v_codes;
371  int t0, t1, t2, t3, t4, t5, t6, t7;
372  const float *vec1, *vec2;
373 
374  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
375  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
376  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
377  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
378 
379  __asm__ volatile (
380  ".set push \n\t"
381  ".set noreorder \n\t"
382 
383  "ori %[t4], $zero, 4 \n\t"
384  "slt %[t0], %[t4], %[qc1] \n\t"
385  "slt %[t1], %[t4], %[qc2] \n\t"
386  "slt %[t2], %[t4], %[qc3] \n\t"
387  "slt %[t3], %[t4], %[qc4] \n\t"
388  "movn %[qc1], %[t4], %[t0] \n\t"
389  "movn %[qc2], %[t4], %[t1] \n\t"
390  "movn %[qc3], %[t4], %[t2] \n\t"
391  "movn %[qc4], %[t4], %[t3] \n\t"
392  "lw %[t0], 0(%[in_int]) \n\t"
393  "lw %[t1], 4(%[in_int]) \n\t"
394  "lw %[t2], 8(%[in_int]) \n\t"
395  "lw %[t3], 12(%[in_int]) \n\t"
396  "srl %[t0], %[t0], 31 \n\t"
397  "srl %[t1], %[t1], 31 \n\t"
398  "srl %[t2], %[t2], 31 \n\t"
399  "srl %[t3], %[t3], 31 \n\t"
400  "subu %[t4], $zero, %[qc1] \n\t"
401  "subu %[t5], $zero, %[qc2] \n\t"
402  "subu %[t6], $zero, %[qc3] \n\t"
403  "subu %[t7], $zero, %[qc4] \n\t"
404  "movn %[qc1], %[t4], %[t0] \n\t"
405  "movn %[qc2], %[t5], %[t1] \n\t"
406  "movn %[qc3], %[t6], %[t2] \n\t"
407  "movn %[qc4], %[t7], %[t3] \n\t"
408 
409  ".set pop \n\t"
410 
411  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
412  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
413  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
414  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
415  : [in_int]"r"(in_int)
416  : "memory"
417  );
418 
419  curidx = 9 * qc1;
420  curidx += qc2 + 40;
421 
422  curidx2 = 9 * qc3;
423  curidx2 += qc4 + 40;
424 
425  v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
426  v_bits = p_bits[curidx] + p_bits[curidx2];
427  put_bits(pb, v_bits, v_codes);
428 
429  if (out || energy) {
430  float e1,e2,e3,e4;
431  vec1 = &p_vec[curidx*2 ];
432  vec2 = &p_vec[curidx2*2];
433  e1 = vec1[0] * IQ;
434  e2 = vec1[1] * IQ;
435  e3 = vec2[0] * IQ;
436  e4 = vec2[1] * IQ;
437  if (out) {
438  out[i+0] = e1;
439  out[i+1] = e2;
440  out[i+2] = e3;
441  out[i+3] = e4;
442  }
443  if (energy)
444  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
445  }
446  }
447  if (energy)
448  *energy = qenergy;
449 }
450 
451 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
452  PutBitContext *pb, const float *in, float *out,
453  const float *scaled, int size, int scale_idx,
454  int cb, const float lambda, const float uplim,
455  int *bits, float *energy, const float ROUNDING)
456 {
457  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
458  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
459  int i;
460  int qc1, qc2, qc3, qc4;
461  float qenergy = 0.0f;
462 
463  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
464  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
465  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
466 
467  abs_pow34_v(s->scoefs, in, size);
468  scaled = s->scoefs;
469  for (i = 0; i < size; i += 4) {
470  int curidx1, curidx2, sign1, count1, sign2, count2;
471  int *in_int = (int *)&in[i];
472  uint8_t v_bits;
473  unsigned int v_codes;
474  int t0, t1, t2, t3, t4;
475  const float *vec1, *vec2;
476 
477  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
478  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
479  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
480  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
481 
482  __asm__ volatile (
483  ".set push \n\t"
484  ".set noreorder \n\t"
485 
486  "ori %[t4], $zero, 7 \n\t"
487  "ori %[sign1], $zero, 0 \n\t"
488  "ori %[sign2], $zero, 0 \n\t"
489  "slt %[t0], %[t4], %[qc1] \n\t"
490  "slt %[t1], %[t4], %[qc2] \n\t"
491  "slt %[t2], %[t4], %[qc3] \n\t"
492  "slt %[t3], %[t4], %[qc4] \n\t"
493  "movn %[qc1], %[t4], %[t0] \n\t"
494  "movn %[qc2], %[t4], %[t1] \n\t"
495  "movn %[qc3], %[t4], %[t2] \n\t"
496  "movn %[qc4], %[t4], %[t3] \n\t"
497  "lw %[t0], 0(%[in_int]) \n\t"
498  "lw %[t1], 4(%[in_int]) \n\t"
499  "lw %[t2], 8(%[in_int]) \n\t"
500  "lw %[t3], 12(%[in_int]) \n\t"
501  "slt %[t0], %[t0], $zero \n\t"
502  "movn %[sign1], %[t0], %[qc1] \n\t"
503  "slt %[t2], %[t2], $zero \n\t"
504  "movn %[sign2], %[t2], %[qc3] \n\t"
505  "slt %[t1], %[t1], $zero \n\t"
506  "sll %[t0], %[sign1], 1 \n\t"
507  "or %[t0], %[t0], %[t1] \n\t"
508  "movn %[sign1], %[t0], %[qc2] \n\t"
509  "slt %[t3], %[t3], $zero \n\t"
510  "sll %[t0], %[sign2], 1 \n\t"
511  "or %[t0], %[t0], %[t3] \n\t"
512  "movn %[sign2], %[t0], %[qc4] \n\t"
513  "slt %[count1], $zero, %[qc1] \n\t"
514  "slt %[t1], $zero, %[qc2] \n\t"
515  "slt %[count2], $zero, %[qc3] \n\t"
516  "slt %[t2], $zero, %[qc4] \n\t"
517  "addu %[count1], %[count1], %[t1] \n\t"
518  "addu %[count2], %[count2], %[t2] \n\t"
519 
520  ".set pop \n\t"
521 
522  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
523  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
524  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
525  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
526  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
527  [t4]"=&r"(t4)
528  : [in_int]"r"(in_int)
529  : "t0", "t1", "t2", "t3", "t4",
530  "memory"
531  );
532 
533  curidx1 = 8 * qc1;
534  curidx1 += qc2;
535 
536  v_codes = (p_codes[curidx1] << count1) | sign1;
537  v_bits = p_bits[curidx1] + count1;
538  put_bits(pb, v_bits, v_codes);
539 
540  curidx2 = 8 * qc3;
541  curidx2 += qc4;
542 
543  v_codes = (p_codes[curidx2] << count2) | sign2;
544  v_bits = p_bits[curidx2] + count2;
545  put_bits(pb, v_bits, v_codes);
546 
547  if (out || energy) {
548  float e1,e2,e3,e4;
549  vec1 = &p_vec[curidx1*2];
550  vec2 = &p_vec[curidx2*2];
551  e1 = copysignf(vec1[0] * IQ, in[i+0]);
552  e2 = copysignf(vec1[1] * IQ, in[i+1]);
553  e3 = copysignf(vec2[0] * IQ, in[i+2]);
554  e4 = copysignf(vec2[1] * IQ, in[i+3]);
555  if (out) {
556  out[i+0] = e1;
557  out[i+1] = e2;
558  out[i+2] = e3;
559  out[i+3] = e4;
560  }
561  if (energy)
562  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
563  }
564  }
565  if (energy)
566  *energy = qenergy;
567 }
568 
569 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
570  PutBitContext *pb, const float *in, float *out,
571  const float *scaled, int size, int scale_idx,
572  int cb, const float lambda, const float uplim,
573  int *bits, float *energy, const float ROUNDING)
574 {
575  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
576  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
577  int i;
578  int qc1, qc2, qc3, qc4;
579  float qenergy = 0.0f;
580 
581  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
582  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
583  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
584 
585  abs_pow34_v(s->scoefs, in, size);
586  scaled = s->scoefs;
587  for (i = 0; i < size; i += 4) {
588  int curidx1, curidx2, sign1, count1, sign2, count2;
589  int *in_int = (int *)&in[i];
590  uint8_t v_bits;
591  unsigned int v_codes;
592  int t0, t1, t2, t3, t4;
593  const float *vec1, *vec2;
594 
595  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
596  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
597  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
598  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
599 
600  __asm__ volatile (
601  ".set push \n\t"
602  ".set noreorder \n\t"
603 
604  "ori %[t4], $zero, 12 \n\t"
605  "ori %[sign1], $zero, 0 \n\t"
606  "ori %[sign2], $zero, 0 \n\t"
607  "slt %[t0], %[t4], %[qc1] \n\t"
608  "slt %[t1], %[t4], %[qc2] \n\t"
609  "slt %[t2], %[t4], %[qc3] \n\t"
610  "slt %[t3], %[t4], %[qc4] \n\t"
611  "movn %[qc1], %[t4], %[t0] \n\t"
612  "movn %[qc2], %[t4], %[t1] \n\t"
613  "movn %[qc3], %[t4], %[t2] \n\t"
614  "movn %[qc4], %[t4], %[t3] \n\t"
615  "lw %[t0], 0(%[in_int]) \n\t"
616  "lw %[t1], 4(%[in_int]) \n\t"
617  "lw %[t2], 8(%[in_int]) \n\t"
618  "lw %[t3], 12(%[in_int]) \n\t"
619  "slt %[t0], %[t0], $zero \n\t"
620  "movn %[sign1], %[t0], %[qc1] \n\t"
621  "slt %[t2], %[t2], $zero \n\t"
622  "movn %[sign2], %[t2], %[qc3] \n\t"
623  "slt %[t1], %[t1], $zero \n\t"
624  "sll %[t0], %[sign1], 1 \n\t"
625  "or %[t0], %[t0], %[t1] \n\t"
626  "movn %[sign1], %[t0], %[qc2] \n\t"
627  "slt %[t3], %[t3], $zero \n\t"
628  "sll %[t0], %[sign2], 1 \n\t"
629  "or %[t0], %[t0], %[t3] \n\t"
630  "movn %[sign2], %[t0], %[qc4] \n\t"
631  "slt %[count1], $zero, %[qc1] \n\t"
632  "slt %[t1], $zero, %[qc2] \n\t"
633  "slt %[count2], $zero, %[qc3] \n\t"
634  "slt %[t2], $zero, %[qc4] \n\t"
635  "addu %[count1], %[count1], %[t1] \n\t"
636  "addu %[count2], %[count2], %[t2] \n\t"
637 
638  ".set pop \n\t"
639 
640  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
641  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
642  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
643  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
644  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
645  [t4]"=&r"(t4)
646  : [in_int]"r"(in_int)
647  : "memory"
648  );
649 
650  curidx1 = 13 * qc1;
651  curidx1 += qc2;
652 
653  v_codes = (p_codes[curidx1] << count1) | sign1;
654  v_bits = p_bits[curidx1] + count1;
655  put_bits(pb, v_bits, v_codes);
656 
657  curidx2 = 13 * qc3;
658  curidx2 += qc4;
659 
660  v_codes = (p_codes[curidx2] << count2) | sign2;
661  v_bits = p_bits[curidx2] + count2;
662  put_bits(pb, v_bits, v_codes);
663 
664  if (out || energy) {
665  float e1,e2,e3,e4;
666  vec1 = &p_vec[curidx1*2];
667  vec2 = &p_vec[curidx2*2];
668  e1 = copysignf(vec1[0] * IQ, in[i+0]);
669  e2 = copysignf(vec1[1] * IQ, in[i+1]);
670  e3 = copysignf(vec2[0] * IQ, in[i+2]);
671  e4 = copysignf(vec2[1] * IQ, in[i+3]);
672  if (out) {
673  out[i+0] = e1;
674  out[i+1] = e2;
675  out[i+2] = e3;
676  out[i+3] = e4;
677  }
678  if (energy)
679  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
680  }
681  }
682  if (energy)
683  *energy = qenergy;
684 }
685 
686 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
687  PutBitContext *pb, const float *in, float *out,
688  const float *scaled, int size, int scale_idx,
689  int cb, const float lambda, const float uplim,
690  int *bits, float *energy, const float ROUNDING)
691 {
692  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
693  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
694  int i;
695  int qc1, qc2, qc3, qc4;
696  float qenergy = 0.0f;
697 
698  uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
699  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
700  float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
701 
702  abs_pow34_v(s->scoefs, in, size);
703  scaled = s->scoefs;
704 
705  if (cb < 11) {
706  for (i = 0; i < size; i += 4) {
707  int curidx, curidx2, sign1, count1, sign2, count2;
708  int *in_int = (int *)&in[i];
709  uint8_t v_bits;
710  unsigned int v_codes;
711  int t0, t1, t2, t3, t4;
712  const float *vec1, *vec2;
713 
714  qc1 = scaled[i ] * Q34 + ROUNDING;
715  qc2 = scaled[i+1] * Q34 + ROUNDING;
716  qc3 = scaled[i+2] * Q34 + ROUNDING;
717  qc4 = scaled[i+3] * Q34 + ROUNDING;
718 
719  __asm__ volatile (
720  ".set push \n\t"
721  ".set noreorder \n\t"
722 
723  "ori %[t4], $zero, 16 \n\t"
724  "ori %[sign1], $zero, 0 \n\t"
725  "ori %[sign2], $zero, 0 \n\t"
726  "slt %[t0], %[t4], %[qc1] \n\t"
727  "slt %[t1], %[t4], %[qc2] \n\t"
728  "slt %[t2], %[t4], %[qc3] \n\t"
729  "slt %[t3], %[t4], %[qc4] \n\t"
730  "movn %[qc1], %[t4], %[t0] \n\t"
731  "movn %[qc2], %[t4], %[t1] \n\t"
732  "movn %[qc3], %[t4], %[t2] \n\t"
733  "movn %[qc4], %[t4], %[t3] \n\t"
734  "lw %[t0], 0(%[in_int]) \n\t"
735  "lw %[t1], 4(%[in_int]) \n\t"
736  "lw %[t2], 8(%[in_int]) \n\t"
737  "lw %[t3], 12(%[in_int]) \n\t"
738  "slt %[t0], %[t0], $zero \n\t"
739  "movn %[sign1], %[t0], %[qc1] \n\t"
740  "slt %[t2], %[t2], $zero \n\t"
741  "movn %[sign2], %[t2], %[qc3] \n\t"
742  "slt %[t1], %[t1], $zero \n\t"
743  "sll %[t0], %[sign1], 1 \n\t"
744  "or %[t0], %[t0], %[t1] \n\t"
745  "movn %[sign1], %[t0], %[qc2] \n\t"
746  "slt %[t3], %[t3], $zero \n\t"
747  "sll %[t0], %[sign2], 1 \n\t"
748  "or %[t0], %[t0], %[t3] \n\t"
749  "movn %[sign2], %[t0], %[qc4] \n\t"
750  "slt %[count1], $zero, %[qc1] \n\t"
751  "slt %[t1], $zero, %[qc2] \n\t"
752  "slt %[count2], $zero, %[qc3] \n\t"
753  "slt %[t2], $zero, %[qc4] \n\t"
754  "addu %[count1], %[count1], %[t1] \n\t"
755  "addu %[count2], %[count2], %[t2] \n\t"
756 
757  ".set pop \n\t"
758 
759  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
760  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
761  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
762  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
763  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
764  [t4]"=&r"(t4)
765  : [in_int]"r"(in_int)
766  : "memory"
767  );
768 
769  curidx = 17 * qc1;
770  curidx += qc2;
771  curidx2 = 17 * qc3;
772  curidx2 += qc4;
773 
774  v_codes = (p_codes[curidx] << count1) | sign1;
775  v_bits = p_bits[curidx] + count1;
776  put_bits(pb, v_bits, v_codes);
777 
778  v_codes = (p_codes[curidx2] << count2) | sign2;
779  v_bits = p_bits[curidx2] + count2;
780  put_bits(pb, v_bits, v_codes);
781 
782  if (out || energy) {
783  float e1,e2,e3,e4;
784  vec1 = &p_vectors[curidx*2 ];
785  vec2 = &p_vectors[curidx2*2];
786  e1 = copysignf(vec1[0] * IQ, in[i+0]);
787  e2 = copysignf(vec1[1] * IQ, in[i+1]);
788  e3 = copysignf(vec2[0] * IQ, in[i+2]);
789  e4 = copysignf(vec2[1] * IQ, in[i+3]);
790  if (out) {
791  out[i+0] = e1;
792  out[i+1] = e2;
793  out[i+2] = e3;
794  out[i+3] = e4;
795  }
796  if (energy)
797  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
798  }
799  }
800  } else {
801  for (i = 0; i < size; i += 4) {
802  int curidx, curidx2, sign1, count1, sign2, count2;
803  int *in_int = (int *)&in[i];
804  uint8_t v_bits;
805  unsigned int v_codes;
806  int c1, c2, c3, c4;
807  int t0, t1, t2, t3, t4;
808 
809  qc1 = scaled[i ] * Q34 + ROUNDING;
810  qc2 = scaled[i+1] * Q34 + ROUNDING;
811  qc3 = scaled[i+2] * Q34 + ROUNDING;
812  qc4 = scaled[i+3] * Q34 + ROUNDING;
813 
814  __asm__ volatile (
815  ".set push \n\t"
816  ".set noreorder \n\t"
817 
818  "ori %[t4], $zero, 16 \n\t"
819  "ori %[sign1], $zero, 0 \n\t"
820  "ori %[sign2], $zero, 0 \n\t"
821  "shll_s.w %[c1], %[qc1], 18 \n\t"
822  "shll_s.w %[c2], %[qc2], 18 \n\t"
823  "shll_s.w %[c3], %[qc3], 18 \n\t"
824  "shll_s.w %[c4], %[qc4], 18 \n\t"
825  "srl %[c1], %[c1], 18 \n\t"
826  "srl %[c2], %[c2], 18 \n\t"
827  "srl %[c3], %[c3], 18 \n\t"
828  "srl %[c4], %[c4], 18 \n\t"
829  "slt %[t0], %[t4], %[qc1] \n\t"
830  "slt %[t1], %[t4], %[qc2] \n\t"
831  "slt %[t2], %[t4], %[qc3] \n\t"
832  "slt %[t3], %[t4], %[qc4] \n\t"
833  "movn %[qc1], %[t4], %[t0] \n\t"
834  "movn %[qc2], %[t4], %[t1] \n\t"
835  "movn %[qc3], %[t4], %[t2] \n\t"
836  "movn %[qc4], %[t4], %[t3] \n\t"
837  "lw %[t0], 0(%[in_int]) \n\t"
838  "lw %[t1], 4(%[in_int]) \n\t"
839  "lw %[t2], 8(%[in_int]) \n\t"
840  "lw %[t3], 12(%[in_int]) \n\t"
841  "slt %[t0], %[t0], $zero \n\t"
842  "movn %[sign1], %[t0], %[qc1] \n\t"
843  "slt %[t2], %[t2], $zero \n\t"
844  "movn %[sign2], %[t2], %[qc3] \n\t"
845  "slt %[t1], %[t1], $zero \n\t"
846  "sll %[t0], %[sign1], 1 \n\t"
847  "or %[t0], %[t0], %[t1] \n\t"
848  "movn %[sign1], %[t0], %[qc2] \n\t"
849  "slt %[t3], %[t3], $zero \n\t"
850  "sll %[t0], %[sign2], 1 \n\t"
851  "or %[t0], %[t0], %[t3] \n\t"
852  "movn %[sign2], %[t0], %[qc4] \n\t"
853  "slt %[count1], $zero, %[qc1] \n\t"
854  "slt %[t1], $zero, %[qc2] \n\t"
855  "slt %[count2], $zero, %[qc3] \n\t"
856  "slt %[t2], $zero, %[qc4] \n\t"
857  "addu %[count1], %[count1], %[t1] \n\t"
858  "addu %[count2], %[count2], %[t2] \n\t"
859 
860  ".set pop \n\t"
861 
862  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
863  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
864  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
865  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
866  [c1]"=&r"(c1), [c2]"=&r"(c2),
867  [c3]"=&r"(c3), [c4]"=&r"(c4),
868  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
869  [t4]"=&r"(t4)
870  : [in_int]"r"(in_int)
871  : "memory"
872  );
873 
874  curidx = 17 * qc1;
875  curidx += qc2;
876 
877  curidx2 = 17 * qc3;
878  curidx2 += qc4;
879 
880  v_codes = (p_codes[curidx] << count1) | sign1;
881  v_bits = p_bits[curidx] + count1;
882  put_bits(pb, v_bits, v_codes);
883 
884  if (p_vectors[curidx*2 ] == 64.0f) {
885  int len = av_log2(c1);
886  v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
887  put_bits(pb, len * 2 - 3, v_codes);
888  }
889  if (p_vectors[curidx*2+1] == 64.0f) {
890  int len = av_log2(c2);
891  v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
892  put_bits(pb, len*2-3, v_codes);
893  }
894 
895  v_codes = (p_codes[curidx2] << count2) | sign2;
896  v_bits = p_bits[curidx2] + count2;
897  put_bits(pb, v_bits, v_codes);
898 
899  if (p_vectors[curidx2*2 ] == 64.0f) {
900  int len = av_log2(c3);
901  v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
902  put_bits(pb, len* 2 - 3, v_codes);
903  }
904  if (p_vectors[curidx2*2+1] == 64.0f) {
905  int len = av_log2(c4);
906  v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
907  put_bits(pb, len * 2 - 3, v_codes);
908  }
909 
910  if (out || energy) {
911  float e1, e2, e3, e4;
912  e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
913  e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
914  e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
915  e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
916  if (out) {
917  out[i+0] = e1;
918  out[i+1] = e2;
919  out[i+2] = e3;
920  out[i+3] = e4;
921  }
922  if (energy)
923  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
924  }
925  }
926  }
927  if (energy)
928  *energy = qenergy;
929 }
930 
931 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
932  PutBitContext *pb, const float *in, float *out,
933  const float *scaled, int size, int scale_idx,
934  int cb, const float lambda, const float uplim,
935  int *bits, float *energy, const float ROUNDING) {
936  av_assert0(0);
937 }
938 
939 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
940  PutBitContext *pb, const float *in, float *out,
941  const float *scaled, int size, int scale_idx,
942  int cb, const float lambda, const float uplim,
943  int *bits, float *energy, const float ROUNDING) {
944  int i;
945  if (bits)
946  *bits = 0;
947  if (out) {
948  for (i = 0; i < size; i += 4) {
949  out[i ] = 0.0f;
950  out[i+1] = 0.0f;
951  out[i+2] = 0.0f;
952  out[i+3] = 0.0f;
953  }
954  }
955  if (energy)
956  *energy = 0.0f;
957 }
958 
959 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
960  PutBitContext *pb, const float *in, float *out,
961  const float *scaled, int size, int scale_idx,
962  int cb, const float lambda, const float uplim,
963  int *bits, float *energy, const float ROUNDING) = {
964  quantize_and_encode_band_cost_ZERO_mips,
965  quantize_and_encode_band_cost_SQUAD_mips,
966  quantize_and_encode_band_cost_SQUAD_mips,
967  quantize_and_encode_band_cost_UQUAD_mips,
968  quantize_and_encode_band_cost_UQUAD_mips,
969  quantize_and_encode_band_cost_SPAIR_mips,
970  quantize_and_encode_band_cost_SPAIR_mips,
971  quantize_and_encode_band_cost_UPAIR7_mips,
972  quantize_and_encode_band_cost_UPAIR7_mips,
973  quantize_and_encode_band_cost_UPAIR12_mips,
974  quantize_and_encode_band_cost_UPAIR12_mips,
975  quantize_and_encode_band_cost_ESC_mips,
976  quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
977  quantize_and_encode_band_cost_ZERO_mips,
978  quantize_and_encode_band_cost_ZERO_mips,
979  quantize_and_encode_band_cost_ZERO_mips,
980 };
981 
982 #define quantize_and_encode_band_cost( \
983  s, pb, in, out, scaled, size, scale_idx, cb, \
984  lambda, uplim, bits, energy, ROUNDING) \
985  quantize_and_encode_band_cost_arr[cb]( \
986  s, pb, in, out, scaled, size, scale_idx, cb, \
987  lambda, uplim, bits, energy, ROUNDING)
988 
989 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
990  const float *in, float *out, int size, int scale_idx,
991  int cb, const float lambda, int rtz)
992 {
993  quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
994  INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
995 }
996 
997 /**
998  * Functions developed from template function and optimized for getting the number of bits
999  */
1000 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1001  PutBitContext *pb, const float *in,
1002  const float *scaled, int size, int scale_idx,
1003  int cb, const float lambda, const float uplim,
1004  int *bits)
1005 {
1006  return 0;
1007 }
1008 
1009 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1010  PutBitContext *pb, const float *in,
1011  const float *scaled, int size, int scale_idx,
1012  int cb, const float lambda, const float uplim,
1013  int *bits)
1014 {
1015  av_assert0(0);
1016  return 0;
1017 }
1018 
1019 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1020  PutBitContext *pb, const float *in,
1021  const float *scaled, int size, int scale_idx,
1022  int cb, const float lambda, const float uplim,
1023  int *bits)
1024 {
1025  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1026  int i;
1027  int qc1, qc2, qc3, qc4;
1028  int curbits = 0;
1029 
1030  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1031 
1032  for (i = 0; i < size; i += 4) {
1033  int curidx;
1034  int *in_int = (int *)&in[i];
1035  int t0, t1, t2, t3, t4, t5, t6, t7;
1036 
1037  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1038  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1039  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1040  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1041 
1042  __asm__ volatile (
1043  ".set push \n\t"
1044  ".set noreorder \n\t"
1045 
1046  "slt %[qc1], $zero, %[qc1] \n\t"
1047  "slt %[qc2], $zero, %[qc2] \n\t"
1048  "slt %[qc3], $zero, %[qc3] \n\t"
1049  "slt %[qc4], $zero, %[qc4] \n\t"
1050  "lw %[t0], 0(%[in_int]) \n\t"
1051  "lw %[t1], 4(%[in_int]) \n\t"
1052  "lw %[t2], 8(%[in_int]) \n\t"
1053  "lw %[t3], 12(%[in_int]) \n\t"
1054  "srl %[t0], %[t0], 31 \n\t"
1055  "srl %[t1], %[t1], 31 \n\t"
1056  "srl %[t2], %[t2], 31 \n\t"
1057  "srl %[t3], %[t3], 31 \n\t"
1058  "subu %[t4], $zero, %[qc1] \n\t"
1059  "subu %[t5], $zero, %[qc2] \n\t"
1060  "subu %[t6], $zero, %[qc3] \n\t"
1061  "subu %[t7], $zero, %[qc4] \n\t"
1062  "movn %[qc1], %[t4], %[t0] \n\t"
1063  "movn %[qc2], %[t5], %[t1] \n\t"
1064  "movn %[qc3], %[t6], %[t2] \n\t"
1065  "movn %[qc4], %[t7], %[t3] \n\t"
1066 
1067  ".set pop \n\t"
1068 
1069  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1070  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1071  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1072  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1073  : [in_int]"r"(in_int)
1074  : "memory"
1075  );
1076 
1077  curidx = qc1;
1078  curidx *= 3;
1079  curidx += qc2;
1080  curidx *= 3;
1081  curidx += qc3;
1082  curidx *= 3;
1083  curidx += qc4;
1084  curidx += 40;
1085 
1086  curbits += p_bits[curidx];
1087  }
1088  return curbits;
1089 }
1090 
1091 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1092  PutBitContext *pb, const float *in,
1093  const float *scaled, int size, int scale_idx,
1094  int cb, const float lambda, const float uplim,
1095  int *bits)
1096 {
1097  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1098  int i;
1099  int curbits = 0;
1100  int qc1, qc2, qc3, qc4;
1101 
1102  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1103 
1104  for (i = 0; i < size; i += 4) {
1105  int curidx;
1106  int t0, t1, t2, t3, t4;
1107 
1108  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1109  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1110  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1111  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1112 
1113  __asm__ volatile (
1114  ".set push \n\t"
1115  ".set noreorder \n\t"
1116 
1117  "ori %[t4], $zero, 2 \n\t"
1118  "slt %[t0], %[t4], %[qc1] \n\t"
1119  "slt %[t1], %[t4], %[qc2] \n\t"
1120  "slt %[t2], %[t4], %[qc3] \n\t"
1121  "slt %[t3], %[t4], %[qc4] \n\t"
1122  "movn %[qc1], %[t4], %[t0] \n\t"
1123  "movn %[qc2], %[t4], %[t1] \n\t"
1124  "movn %[qc3], %[t4], %[t2] \n\t"
1125  "movn %[qc4], %[t4], %[t3] \n\t"
1126 
1127  ".set pop \n\t"
1128 
1129  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1130  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1131  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1132  [t4]"=&r"(t4)
1133  );
1134 
1135  curidx = qc1;
1136  curidx *= 3;
1137  curidx += qc2;
1138  curidx *= 3;
1139  curidx += qc3;
1140  curidx *= 3;
1141  curidx += qc4;
1142 
1143  curbits += p_bits[curidx];
1144  curbits += uquad_sign_bits[curidx];
1145  }
1146  return curbits;
1147 }
1148 
1149 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1150  PutBitContext *pb, const float *in,
1151  const float *scaled, int size, int scale_idx,
1152  int cb, const float lambda, const float uplim,
1153  int *bits)
1154 {
1155  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1156  int i;
1157  int qc1, qc2, qc3, qc4;
1158  int curbits = 0;
1159 
1160  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1161 
1162  for (i = 0; i < size; i += 4) {
1163  int curidx, curidx2;
1164  int *in_int = (int *)&in[i];
1165  int t0, t1, t2, t3, t4, t5, t6, t7;
1166 
1167  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1168  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1169  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1170  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1171 
1172  __asm__ volatile (
1173  ".set push \n\t"
1174  ".set noreorder \n\t"
1175 
1176  "ori %[t4], $zero, 4 \n\t"
1177  "slt %[t0], %[t4], %[qc1] \n\t"
1178  "slt %[t1], %[t4], %[qc2] \n\t"
1179  "slt %[t2], %[t4], %[qc3] \n\t"
1180  "slt %[t3], %[t4], %[qc4] \n\t"
1181  "movn %[qc1], %[t4], %[t0] \n\t"
1182  "movn %[qc2], %[t4], %[t1] \n\t"
1183  "movn %[qc3], %[t4], %[t2] \n\t"
1184  "movn %[qc4], %[t4], %[t3] \n\t"
1185  "lw %[t0], 0(%[in_int]) \n\t"
1186  "lw %[t1], 4(%[in_int]) \n\t"
1187  "lw %[t2], 8(%[in_int]) \n\t"
1188  "lw %[t3], 12(%[in_int]) \n\t"
1189  "srl %[t0], %[t0], 31 \n\t"
1190  "srl %[t1], %[t1], 31 \n\t"
1191  "srl %[t2], %[t2], 31 \n\t"
1192  "srl %[t3], %[t3], 31 \n\t"
1193  "subu %[t4], $zero, %[qc1] \n\t"
1194  "subu %[t5], $zero, %[qc2] \n\t"
1195  "subu %[t6], $zero, %[qc3] \n\t"
1196  "subu %[t7], $zero, %[qc4] \n\t"
1197  "movn %[qc1], %[t4], %[t0] \n\t"
1198  "movn %[qc2], %[t5], %[t1] \n\t"
1199  "movn %[qc3], %[t6], %[t2] \n\t"
1200  "movn %[qc4], %[t7], %[t3] \n\t"
1201 
1202  ".set pop \n\t"
1203 
1204  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1205  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1206  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1207  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1208  : [in_int]"r"(in_int)
1209  : "memory"
1210  );
1211 
1212  curidx = 9 * qc1;
1213  curidx += qc2 + 40;
1214 
1215  curidx2 = 9 * qc3;
1216  curidx2 += qc4 + 40;
1217 
1218  curbits += p_bits[curidx] + p_bits[curidx2];
1219  }
1220  return curbits;
1221 }
1222 
1223 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1224  PutBitContext *pb, const float *in,
1225  const float *scaled, int size, int scale_idx,
1226  int cb, const float lambda, const float uplim,
1227  int *bits)
1228 {
1229  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1230  int i;
1231  int qc1, qc2, qc3, qc4;
1232  int curbits = 0;
1233 
1234  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1235 
1236  for (i = 0; i < size; i += 4) {
1237  int curidx, curidx2;
1238  int t0, t1, t2, t3, t4;
1239 
1240  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1241  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1242  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1243  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1244 
1245  __asm__ volatile (
1246  ".set push \n\t"
1247  ".set noreorder \n\t"
1248 
1249  "ori %[t4], $zero, 7 \n\t"
1250  "slt %[t0], %[t4], %[qc1] \n\t"
1251  "slt %[t1], %[t4], %[qc2] \n\t"
1252  "slt %[t2], %[t4], %[qc3] \n\t"
1253  "slt %[t3], %[t4], %[qc4] \n\t"
1254  "movn %[qc1], %[t4], %[t0] \n\t"
1255  "movn %[qc2], %[t4], %[t1] \n\t"
1256  "movn %[qc3], %[t4], %[t2] \n\t"
1257  "movn %[qc4], %[t4], %[t3] \n\t"
1258 
1259  ".set pop \n\t"
1260 
1261  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1262  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1263  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1264  [t4]"=&r"(t4)
1265  );
1266 
1267  curidx = 8 * qc1;
1268  curidx += qc2;
1269 
1270  curidx2 = 8 * qc3;
1271  curidx2 += qc4;
1272 
1273  curbits += p_bits[curidx] +
1274  upair7_sign_bits[curidx] +
1275  p_bits[curidx2] +
1276  upair7_sign_bits[curidx2];
1277  }
1278  return curbits;
1279 }
1280 
1281 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1282  PutBitContext *pb, const float *in,
1283  const float *scaled, int size, int scale_idx,
1284  int cb, const float lambda, const float uplim,
1285  int *bits)
1286 {
1287  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1288  int i;
1289  int qc1, qc2, qc3, qc4;
1290  int curbits = 0;
1291 
1292  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1293 
1294  for (i = 0; i < size; i += 4) {
1295  int curidx, curidx2;
1296  int t0, t1, t2, t3, t4;
1297 
1298  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1299  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1300  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1301  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1302 
1303  __asm__ volatile (
1304  ".set push \n\t"
1305  ".set noreorder \n\t"
1306 
1307  "ori %[t4], $zero, 12 \n\t"
1308  "slt %[t0], %[t4], %[qc1] \n\t"
1309  "slt %[t1], %[t4], %[qc2] \n\t"
1310  "slt %[t2], %[t4], %[qc3] \n\t"
1311  "slt %[t3], %[t4], %[qc4] \n\t"
1312  "movn %[qc1], %[t4], %[t0] \n\t"
1313  "movn %[qc2], %[t4], %[t1] \n\t"
1314  "movn %[qc3], %[t4], %[t2] \n\t"
1315  "movn %[qc4], %[t4], %[t3] \n\t"
1316 
1317  ".set pop \n\t"
1318 
1319  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1320  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1321  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1322  [t4]"=&r"(t4)
1323  );
1324 
1325  curidx = 13 * qc1;
1326  curidx += qc2;
1327 
1328  curidx2 = 13 * qc3;
1329  curidx2 += qc4;
1330 
1331  curbits += p_bits[curidx] +
1332  p_bits[curidx2] +
1333  upair12_sign_bits[curidx] +
1334  upair12_sign_bits[curidx2];
1335  }
1336  return curbits;
1337 }
1338 
1339 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1340  PutBitContext *pb, const float *in,
1341  const float *scaled, int size, int scale_idx,
1342  int cb, const float lambda, const float uplim,
1343  int *bits)
1344 {
1345  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1346  int i;
1347  int qc1, qc2, qc3, qc4;
1348  int curbits = 0;
1349 
1350  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1351 
1352  for (i = 0; i < size; i += 4) {
1353  int curidx, curidx2;
1354  int cond0, cond1, cond2, cond3;
1355  int c1, c2, c3, c4;
1356  int t4, t5;
1357 
1358  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1359  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1360  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1361  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1362 
1363  __asm__ volatile (
1364  ".set push \n\t"
1365  ".set noreorder \n\t"
1366 
1367  "ori %[t4], $zero, 15 \n\t"
1368  "ori %[t5], $zero, 16 \n\t"
1369  "shll_s.w %[c1], %[qc1], 18 \n\t"
1370  "shll_s.w %[c2], %[qc2], 18 \n\t"
1371  "shll_s.w %[c3], %[qc3], 18 \n\t"
1372  "shll_s.w %[c4], %[qc4], 18 \n\t"
1373  "srl %[c1], %[c1], 18 \n\t"
1374  "srl %[c2], %[c2], 18 \n\t"
1375  "srl %[c3], %[c3], 18 \n\t"
1376  "srl %[c4], %[c4], 18 \n\t"
1377  "slt %[cond0], %[t4], %[qc1] \n\t"
1378  "slt %[cond1], %[t4], %[qc2] \n\t"
1379  "slt %[cond2], %[t4], %[qc3] \n\t"
1380  "slt %[cond3], %[t4], %[qc4] \n\t"
1381  "movn %[qc1], %[t5], %[cond0] \n\t"
1382  "movn %[qc2], %[t5], %[cond1] \n\t"
1383  "movn %[qc3], %[t5], %[cond2] \n\t"
1384  "movn %[qc4], %[t5], %[cond3] \n\t"
1385  "ori %[t5], $zero, 31 \n\t"
1386  "clz %[c1], %[c1] \n\t"
1387  "clz %[c2], %[c2] \n\t"
1388  "clz %[c3], %[c3] \n\t"
1389  "clz %[c4], %[c4] \n\t"
1390  "subu %[c1], %[t5], %[c1] \n\t"
1391  "subu %[c2], %[t5], %[c2] \n\t"
1392  "subu %[c3], %[t5], %[c3] \n\t"
1393  "subu %[c4], %[t5], %[c4] \n\t"
1394  "sll %[c1], %[c1], 1 \n\t"
1395  "sll %[c2], %[c2], 1 \n\t"
1396  "sll %[c3], %[c3], 1 \n\t"
1397  "sll %[c4], %[c4], 1 \n\t"
1398  "addiu %[c1], %[c1], -3 \n\t"
1399  "addiu %[c2], %[c2], -3 \n\t"
1400  "addiu %[c3], %[c3], -3 \n\t"
1401  "addiu %[c4], %[c4], -3 \n\t"
1402  "subu %[cond0], $zero, %[cond0] \n\t"
1403  "subu %[cond1], $zero, %[cond1] \n\t"
1404  "subu %[cond2], $zero, %[cond2] \n\t"
1405  "subu %[cond3], $zero, %[cond3] \n\t"
1406  "and %[c1], %[c1], %[cond0] \n\t"
1407  "and %[c2], %[c2], %[cond1] \n\t"
1408  "and %[c3], %[c3], %[cond2] \n\t"
1409  "and %[c4], %[c4], %[cond3] \n\t"
1410 
1411  ".set pop \n\t"
1412 
1413  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1414  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1415  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1416  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1417  [c1]"=&r"(c1), [c2]"=&r"(c2),
1418  [c3]"=&r"(c3), [c4]"=&r"(c4),
1419  [t4]"=&r"(t4), [t5]"=&r"(t5)
1420  );
1421 
1422  curidx = 17 * qc1;
1423  curidx += qc2;
1424 
1425  curidx2 = 17 * qc3;
1426  curidx2 += qc4;
1427 
1428  curbits += p_bits[curidx];
1429  curbits += esc_sign_bits[curidx];
1430  curbits += p_bits[curidx2];
1431  curbits += esc_sign_bits[curidx2];
1432 
1433  curbits += c1;
1434  curbits += c2;
1435  curbits += c3;
1436  curbits += c4;
1437  }
1438  return curbits;
1439 }
1440 
1441 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1442  PutBitContext *pb, const float *in,
1443  const float *scaled, int size, int scale_idx,
1444  int cb, const float lambda, const float uplim,
1445  int *bits) = {
1446  get_band_numbits_ZERO_mips,
1447  get_band_numbits_SQUAD_mips,
1448  get_band_numbits_SQUAD_mips,
1449  get_band_numbits_UQUAD_mips,
1450  get_band_numbits_UQUAD_mips,
1451  get_band_numbits_SPAIR_mips,
1452  get_band_numbits_SPAIR_mips,
1453  get_band_numbits_UPAIR7_mips,
1454  get_band_numbits_UPAIR7_mips,
1455  get_band_numbits_UPAIR12_mips,
1456  get_band_numbits_UPAIR12_mips,
1457  get_band_numbits_ESC_mips,
1458  get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1459  get_band_numbits_ZERO_mips,
1460  get_band_numbits_ZERO_mips,
1461  get_band_numbits_ZERO_mips,
1462 };
1463 
1464 #define get_band_numbits( \
1465  s, pb, in, scaled, size, scale_idx, cb, \
1466  lambda, uplim, bits) \
1467  get_band_numbits_arr[cb]( \
1468  s, pb, in, scaled, size, scale_idx, cb, \
1469  lambda, uplim, bits)
1470 
1471 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1472  const float *scaled, int size, int scale_idx,
1473  int cb, const float lambda, const float uplim,
1474  int *bits, float *energy, int rtz)
1475 {
1476  return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1477 }
1478 
1479 /**
1480  * Functions developed from template function and optimized for getting the band cost
1481  */
1482 #if HAVE_MIPSFPU
1483 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1484  PutBitContext *pb, const float *in,
1485  const float *scaled, int size, int scale_idx,
1486  int cb, const float lambda, const float uplim,
1487  int *bits, float *energy)
1488 {
1489  int i;
1490  float cost = 0;
1491 
1492  for (i = 0; i < size; i += 4) {
1493  cost += in[i ] * in[i ];
1494  cost += in[i+1] * in[i+1];
1495  cost += in[i+2] * in[i+2];
1496  cost += in[i+3] * in[i+3];
1497  }
1498  if (bits)
1499  *bits = 0;
1500  if (energy)
1501  *energy = 0.0f;
1502  return cost * lambda;
1503 }
1504 
1505 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1506  PutBitContext *pb, const float *in,
1507  const float *scaled, int size, int scale_idx,
1508  int cb, const float lambda, const float uplim,
1509  int *bits, float *energy)
1510 {
1511  av_assert0(0);
1512  return 0;
1513 }
1514 
1515 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1516  PutBitContext *pb, const float *in,
1517  const float *scaled, int size, int scale_idx,
1518  int cb, const float lambda, const float uplim,
1519  int *bits, float *energy)
1520 {
1521  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1522  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1523  int i;
1524  float cost = 0;
1525  float qenergy = 0.0f;
1526  int qc1, qc2, qc3, qc4;
1527  int curbits = 0;
1528 
1529  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1530  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1531 
1532  for (i = 0; i < size; i += 4) {
1533  const float *vec;
1534  int curidx;
1535  int *in_int = (int *)&in[i];
1536  float *in_pos = (float *)&in[i];
1537  float di0, di1, di2, di3;
1538  int t0, t1, t2, t3, t4, t5, t6, t7;
1539 
1540  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1541  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1542  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1543  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1544 
1545  __asm__ volatile (
1546  ".set push \n\t"
1547  ".set noreorder \n\t"
1548 
1549  "slt %[qc1], $zero, %[qc1] \n\t"
1550  "slt %[qc2], $zero, %[qc2] \n\t"
1551  "slt %[qc3], $zero, %[qc3] \n\t"
1552  "slt %[qc4], $zero, %[qc4] \n\t"
1553  "lw %[t0], 0(%[in_int]) \n\t"
1554  "lw %[t1], 4(%[in_int]) \n\t"
1555  "lw %[t2], 8(%[in_int]) \n\t"
1556  "lw %[t3], 12(%[in_int]) \n\t"
1557  "srl %[t0], %[t0], 31 \n\t"
1558  "srl %[t1], %[t1], 31 \n\t"
1559  "srl %[t2], %[t2], 31 \n\t"
1560  "srl %[t3], %[t3], 31 \n\t"
1561  "subu %[t4], $zero, %[qc1] \n\t"
1562  "subu %[t5], $zero, %[qc2] \n\t"
1563  "subu %[t6], $zero, %[qc3] \n\t"
1564  "subu %[t7], $zero, %[qc4] \n\t"
1565  "movn %[qc1], %[t4], %[t0] \n\t"
1566  "movn %[qc2], %[t5], %[t1] \n\t"
1567  "movn %[qc3], %[t6], %[t2] \n\t"
1568  "movn %[qc4], %[t7], %[t3] \n\t"
1569 
1570  ".set pop \n\t"
1571 
1572  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1573  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1574  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1575  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1576  : [in_int]"r"(in_int)
1577  : "memory"
1578  );
1579 
1580  curidx = qc1;
1581  curidx *= 3;
1582  curidx += qc2;
1583  curidx *= 3;
1584  curidx += qc3;
1585  curidx *= 3;
1586  curidx += qc4;
1587  curidx += 40;
1588 
1589  curbits += p_bits[curidx];
1590  vec = &p_codes[curidx*4];
1591 
1592  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1593  + vec[2]*vec[2] + vec[3]*vec[3];
1594 
1595  __asm__ volatile (
1596  ".set push \n\t"
1597  ".set noreorder \n\t"
1598 
1599  "lwc1 $f0, 0(%[in_pos]) \n\t"
1600  "lwc1 $f1, 0(%[vec]) \n\t"
1601  "lwc1 $f2, 4(%[in_pos]) \n\t"
1602  "lwc1 $f3, 4(%[vec]) \n\t"
1603  "lwc1 $f4, 8(%[in_pos]) \n\t"
1604  "lwc1 $f5, 8(%[vec]) \n\t"
1605  "lwc1 $f6, 12(%[in_pos]) \n\t"
1606  "lwc1 $f7, 12(%[vec]) \n\t"
1607  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1608  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1609  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1610  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1611 
1612  ".set pop \n\t"
1613 
1614  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1615  [di2]"=&f"(di2), [di3]"=&f"(di3)
1616  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1617  [IQ]"f"(IQ)
1618  : "$f0", "$f1", "$f2", "$f3",
1619  "$f4", "$f5", "$f6", "$f7",
1620  "memory"
1621  );
1622 
1623  cost += di0 * di0 + di1 * di1
1624  + di2 * di2 + di3 * di3;
1625  }
1626 
1627  if (bits)
1628  *bits = curbits;
1629  if (energy)
1630  *energy = qenergy * (IQ*IQ);
1631  return cost * lambda + curbits;
1632 }
1633 
1634 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1635  PutBitContext *pb, const float *in,
1636  const float *scaled, int size, int scale_idx,
1637  int cb, const float lambda, const float uplim,
1638  int *bits, float *energy)
1639 {
1640  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1641  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1642  int i;
1643  float cost = 0;
1644  float qenergy = 0.0f;
1645  int curbits = 0;
1646  int qc1, qc2, qc3, qc4;
1647 
1648  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1649  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1650 
1651  for (i = 0; i < size; i += 4) {
1652  const float *vec;
1653  int curidx;
1654  float *in_pos = (float *)&in[i];
1655  float di0, di1, di2, di3;
1656  int t0, t1, t2, t3, t4;
1657 
1658  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1659  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1660  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1661  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1662 
1663  __asm__ volatile (
1664  ".set push \n\t"
1665  ".set noreorder \n\t"
1666 
1667  "ori %[t4], $zero, 2 \n\t"
1668  "slt %[t0], %[t4], %[qc1] \n\t"
1669  "slt %[t1], %[t4], %[qc2] \n\t"
1670  "slt %[t2], %[t4], %[qc3] \n\t"
1671  "slt %[t3], %[t4], %[qc4] \n\t"
1672  "movn %[qc1], %[t4], %[t0] \n\t"
1673  "movn %[qc2], %[t4], %[t1] \n\t"
1674  "movn %[qc3], %[t4], %[t2] \n\t"
1675  "movn %[qc4], %[t4], %[t3] \n\t"
1676 
1677  ".set pop \n\t"
1678 
1679  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1680  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1681  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1682  [t4]"=&r"(t4)
1683  );
1684 
1685  curidx = qc1;
1686  curidx *= 3;
1687  curidx += qc2;
1688  curidx *= 3;
1689  curidx += qc3;
1690  curidx *= 3;
1691  curidx += qc4;
1692 
1693  curbits += p_bits[curidx];
1694  curbits += uquad_sign_bits[curidx];
1695  vec = &p_codes[curidx*4];
1696 
1697  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1698  + vec[2]*vec[2] + vec[3]*vec[3];
1699 
1700  __asm__ volatile (
1701  ".set push \n\t"
1702  ".set noreorder \n\t"
1703 
1704  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1705  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1706  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1707  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1708  "abs.s %[di0], %[di0] \n\t"
1709  "abs.s %[di1], %[di1] \n\t"
1710  "abs.s %[di2], %[di2] \n\t"
1711  "abs.s %[di3], %[di3] \n\t"
1712  "lwc1 $f0, 0(%[vec]) \n\t"
1713  "lwc1 $f1, 4(%[vec]) \n\t"
1714  "lwc1 $f2, 8(%[vec]) \n\t"
1715  "lwc1 $f3, 12(%[vec]) \n\t"
1716  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1717  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1718  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1719  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1720 
1721  ".set pop \n\t"
1722 
1723  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1724  [di2]"=&f"(di2), [di3]"=&f"(di3)
1725  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1726  [IQ]"f"(IQ)
1727  : "$f0", "$f1", "$f2", "$f3",
1728  "memory"
1729  );
1730 
1731  cost += di0 * di0 + di1 * di1
1732  + di2 * di2 + di3 * di3;
1733  }
1734 
1735  if (bits)
1736  *bits = curbits;
1737  if (energy)
1738  *energy = qenergy * (IQ*IQ);
1739  return cost * lambda + curbits;
1740 }
1741 
1742 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1743  PutBitContext *pb, const float *in,
1744  const float *scaled, int size, int scale_idx,
1745  int cb, const float lambda, const float uplim,
1746  int *bits, float *energy)
1747 {
1748  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1749  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1750  int i;
1751  float cost = 0;
1752  float qenergy = 0.0f;
1753  int qc1, qc2, qc3, qc4;
1754  int curbits = 0;
1755 
1756  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1757  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1758 
1759  for (i = 0; i < size; i += 4) {
1760  const float *vec, *vec2;
1761  int curidx, curidx2;
1762  int *in_int = (int *)&in[i];
1763  float *in_pos = (float *)&in[i];
1764  float di0, di1, di2, di3;
1765  int t0, t1, t2, t3, t4, t5, t6, t7;
1766 
1767  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1768  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1769  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1770  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1771 
1772  __asm__ volatile (
1773  ".set push \n\t"
1774  ".set noreorder \n\t"
1775 
1776  "ori %[t4], $zero, 4 \n\t"
1777  "slt %[t0], %[t4], %[qc1] \n\t"
1778  "slt %[t1], %[t4], %[qc2] \n\t"
1779  "slt %[t2], %[t4], %[qc3] \n\t"
1780  "slt %[t3], %[t4], %[qc4] \n\t"
1781  "movn %[qc1], %[t4], %[t0] \n\t"
1782  "movn %[qc2], %[t4], %[t1] \n\t"
1783  "movn %[qc3], %[t4], %[t2] \n\t"
1784  "movn %[qc4], %[t4], %[t3] \n\t"
1785  "lw %[t0], 0(%[in_int]) \n\t"
1786  "lw %[t1], 4(%[in_int]) \n\t"
1787  "lw %[t2], 8(%[in_int]) \n\t"
1788  "lw %[t3], 12(%[in_int]) \n\t"
1789  "srl %[t0], %[t0], 31 \n\t"
1790  "srl %[t1], %[t1], 31 \n\t"
1791  "srl %[t2], %[t2], 31 \n\t"
1792  "srl %[t3], %[t3], 31 \n\t"
1793  "subu %[t4], $zero, %[qc1] \n\t"
1794  "subu %[t5], $zero, %[qc2] \n\t"
1795  "subu %[t6], $zero, %[qc3] \n\t"
1796  "subu %[t7], $zero, %[qc4] \n\t"
1797  "movn %[qc1], %[t4], %[t0] \n\t"
1798  "movn %[qc2], %[t5], %[t1] \n\t"
1799  "movn %[qc3], %[t6], %[t2] \n\t"
1800  "movn %[qc4], %[t7], %[t3] \n\t"
1801 
1802  ".set pop \n\t"
1803 
1804  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1805  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1806  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1807  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1808  : [in_int]"r"(in_int)
1809  : "memory"
1810  );
1811 
1812  curidx = 9 * qc1;
1813  curidx += qc2 + 40;
1814 
1815  curidx2 = 9 * qc3;
1816  curidx2 += qc4 + 40;
1817 
1818  curbits += p_bits[curidx];
1819  curbits += p_bits[curidx2];
1820 
1821  vec = &p_codes[curidx*2];
1822  vec2 = &p_codes[curidx2*2];
1823 
1824  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1825  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1826 
1827  __asm__ volatile (
1828  ".set push \n\t"
1829  ".set noreorder \n\t"
1830 
1831  "lwc1 $f0, 0(%[in_pos]) \n\t"
1832  "lwc1 $f1, 0(%[vec]) \n\t"
1833  "lwc1 $f2, 4(%[in_pos]) \n\t"
1834  "lwc1 $f3, 4(%[vec]) \n\t"
1835  "lwc1 $f4, 8(%[in_pos]) \n\t"
1836  "lwc1 $f5, 0(%[vec2]) \n\t"
1837  "lwc1 $f6, 12(%[in_pos]) \n\t"
1838  "lwc1 $f7, 4(%[vec2]) \n\t"
1839  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1840  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1841  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1842  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1843 
1844  ".set pop \n\t"
1845 
1846  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1847  [di2]"=&f"(di2), [di3]"=&f"(di3)
1848  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1849  [vec2]"r"(vec2), [IQ]"f"(IQ)
1850  : "$f0", "$f1", "$f2", "$f3",
1851  "$f4", "$f5", "$f6", "$f7",
1852  "memory"
1853  );
1854 
1855  cost += di0 * di0 + di1 * di1
1856  + di2 * di2 + di3 * di3;
1857  }
1858 
1859  if (bits)
1860  *bits = curbits;
1861  if (energy)
1862  *energy = qenergy * (IQ*IQ);
1863  return cost * lambda + curbits;
1864 }
1865 
1866 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1867  PutBitContext *pb, const float *in,
1868  const float *scaled, int size, int scale_idx,
1869  int cb, const float lambda, const float uplim,
1870  int *bits, float *energy)
1871 {
1872  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1873  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1874  int i;
1875  float cost = 0;
1876  float qenergy = 0.0f;
1877  int qc1, qc2, qc3, qc4;
1878  int curbits = 0;
1879 
1880  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1881  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1882 
1883  for (i = 0; i < size; i += 4) {
1884  const float *vec, *vec2;
1885  int curidx, curidx2, sign1, count1, sign2, count2;
1886  int *in_int = (int *)&in[i];
1887  float *in_pos = (float *)&in[i];
1888  float di0, di1, di2, di3;
1889  int t0, t1, t2, t3, t4;
1890 
1891  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1892  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1893  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1894  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1895 
1896  __asm__ volatile (
1897  ".set push \n\t"
1898  ".set noreorder \n\t"
1899 
1900  "ori %[t4], $zero, 7 \n\t"
1901  "ori %[sign1], $zero, 0 \n\t"
1902  "ori %[sign2], $zero, 0 \n\t"
1903  "slt %[t0], %[t4], %[qc1] \n\t"
1904  "slt %[t1], %[t4], %[qc2] \n\t"
1905  "slt %[t2], %[t4], %[qc3] \n\t"
1906  "slt %[t3], %[t4], %[qc4] \n\t"
1907  "movn %[qc1], %[t4], %[t0] \n\t"
1908  "movn %[qc2], %[t4], %[t1] \n\t"
1909  "movn %[qc3], %[t4], %[t2] \n\t"
1910  "movn %[qc4], %[t4], %[t3] \n\t"
1911  "lw %[t0], 0(%[in_int]) \n\t"
1912  "lw %[t1], 4(%[in_int]) \n\t"
1913  "lw %[t2], 8(%[in_int]) \n\t"
1914  "lw %[t3], 12(%[in_int]) \n\t"
1915  "slt %[t0], %[t0], $zero \n\t"
1916  "movn %[sign1], %[t0], %[qc1] \n\t"
1917  "slt %[t2], %[t2], $zero \n\t"
1918  "movn %[sign2], %[t2], %[qc3] \n\t"
1919  "slt %[t1], %[t1], $zero \n\t"
1920  "sll %[t0], %[sign1], 1 \n\t"
1921  "or %[t0], %[t0], %[t1] \n\t"
1922  "movn %[sign1], %[t0], %[qc2] \n\t"
1923  "slt %[t3], %[t3], $zero \n\t"
1924  "sll %[t0], %[sign2], 1 \n\t"
1925  "or %[t0], %[t0], %[t3] \n\t"
1926  "movn %[sign2], %[t0], %[qc4] \n\t"
1927  "slt %[count1], $zero, %[qc1] \n\t"
1928  "slt %[t1], $zero, %[qc2] \n\t"
1929  "slt %[count2], $zero, %[qc3] \n\t"
1930  "slt %[t2], $zero, %[qc4] \n\t"
1931  "addu %[count1], %[count1], %[t1] \n\t"
1932  "addu %[count2], %[count2], %[t2] \n\t"
1933 
1934  ".set pop \n\t"
1935 
1936  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1937  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1938  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1939  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1940  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1941  [t4]"=&r"(t4)
1942  : [in_int]"r"(in_int)
1943  : "memory"
1944  );
1945 
1946  curidx = 8 * qc1;
1947  curidx += qc2;
1948 
1949  curidx2 = 8 * qc3;
1950  curidx2 += qc4;
1951 
1952  curbits += p_bits[curidx];
1953  curbits += upair7_sign_bits[curidx];
1954  vec = &p_codes[curidx*2];
1955 
1956  curbits += p_bits[curidx2];
1957  curbits += upair7_sign_bits[curidx2];
1958  vec2 = &p_codes[curidx2*2];
1959 
1960  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1961  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1962 
1963  __asm__ volatile (
1964  ".set push \n\t"
1965  ".set noreorder \n\t"
1966 
1967  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1968  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1969  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1970  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1971  "abs.s %[di0], %[di0] \n\t"
1972  "abs.s %[di1], %[di1] \n\t"
1973  "abs.s %[di2], %[di2] \n\t"
1974  "abs.s %[di3], %[di3] \n\t"
1975  "lwc1 $f0, 0(%[vec]) \n\t"
1976  "lwc1 $f1, 4(%[vec]) \n\t"
1977  "lwc1 $f2, 0(%[vec2]) \n\t"
1978  "lwc1 $f3, 4(%[vec2]) \n\t"
1979  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1980  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1981  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1982  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1983 
1984  ".set pop \n\t"
1985 
1986  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1987  [di2]"=&f"(di2), [di3]"=&f"(di3)
1988  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1989  [vec2]"r"(vec2), [IQ]"f"(IQ)
1990  : "$f0", "$f1", "$f2", "$f3",
1991  "memory"
1992  );
1993 
1994  cost += di0 * di0 + di1 * di1
1995  + di2 * di2 + di3 * di3;
1996  }
1997 
1998  if (bits)
1999  *bits = curbits;
2000  if (energy)
2001  *energy = qenergy * (IQ*IQ);
2002  return cost * lambda + curbits;
2003 }
2004 
2005 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2006  PutBitContext *pb, const float *in,
2007  const float *scaled, int size, int scale_idx,
2008  int cb, const float lambda, const float uplim,
2009  int *bits, float *energy)
2010 {
2011  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2012  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2013  int i;
2014  float cost = 0;
2015  float qenergy = 0.0f;
2016  int qc1, qc2, qc3, qc4;
2017  int curbits = 0;
2018 
2019  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2020  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2021 
2022  for (i = 0; i < size; i += 4) {
2023  const float *vec, *vec2;
2024  int curidx, curidx2;
2025  int sign1, count1, sign2, count2;
2026  int *in_int = (int *)&in[i];
2027  float *in_pos = (float *)&in[i];
2028  float di0, di1, di2, di3;
2029  int t0, t1, t2, t3, t4;
2030 
2031  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2032  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2033  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2034  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2035 
2036  __asm__ volatile (
2037  ".set push \n\t"
2038  ".set noreorder \n\t"
2039 
2040  "ori %[t4], $zero, 12 \n\t"
2041  "ori %[sign1], $zero, 0 \n\t"
2042  "ori %[sign2], $zero, 0 \n\t"
2043  "slt %[t0], %[t4], %[qc1] \n\t"
2044  "slt %[t1], %[t4], %[qc2] \n\t"
2045  "slt %[t2], %[t4], %[qc3] \n\t"
2046  "slt %[t3], %[t4], %[qc4] \n\t"
2047  "movn %[qc1], %[t4], %[t0] \n\t"
2048  "movn %[qc2], %[t4], %[t1] \n\t"
2049  "movn %[qc3], %[t4], %[t2] \n\t"
2050  "movn %[qc4], %[t4], %[t3] \n\t"
2051  "lw %[t0], 0(%[in_int]) \n\t"
2052  "lw %[t1], 4(%[in_int]) \n\t"
2053  "lw %[t2], 8(%[in_int]) \n\t"
2054  "lw %[t3], 12(%[in_int]) \n\t"
2055  "slt %[t0], %[t0], $zero \n\t"
2056  "movn %[sign1], %[t0], %[qc1] \n\t"
2057  "slt %[t2], %[t2], $zero \n\t"
2058  "movn %[sign2], %[t2], %[qc3] \n\t"
2059  "slt %[t1], %[t1], $zero \n\t"
2060  "sll %[t0], %[sign1], 1 \n\t"
2061  "or %[t0], %[t0], %[t1] \n\t"
2062  "movn %[sign1], %[t0], %[qc2] \n\t"
2063  "slt %[t3], %[t3], $zero \n\t"
2064  "sll %[t0], %[sign2], 1 \n\t"
2065  "or %[t0], %[t0], %[t3] \n\t"
2066  "movn %[sign2], %[t0], %[qc4] \n\t"
2067  "slt %[count1], $zero, %[qc1] \n\t"
2068  "slt %[t1], $zero, %[qc2] \n\t"
2069  "slt %[count2], $zero, %[qc3] \n\t"
2070  "slt %[t2], $zero, %[qc4] \n\t"
2071  "addu %[count1], %[count1], %[t1] \n\t"
2072  "addu %[count2], %[count2], %[t2] \n\t"
2073 
2074  ".set pop \n\t"
2075 
2076  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2077  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2078  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2079  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2080  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2081  [t4]"=&r"(t4)
2082  : [in_int]"r"(in_int)
2083  : "memory"
2084  );
2085 
2086  curidx = 13 * qc1;
2087  curidx += qc2;
2088 
2089  curidx2 = 13 * qc3;
2090  curidx2 += qc4;
2091 
2092  curbits += p_bits[curidx];
2093  curbits += p_bits[curidx2];
2094  curbits += upair12_sign_bits[curidx];
2095  curbits += upair12_sign_bits[curidx2];
2096  vec = &p_codes[curidx*2];
2097  vec2 = &p_codes[curidx2*2];
2098 
2099  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2100  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2101 
2102  __asm__ volatile (
2103  ".set push \n\t"
2104  ".set noreorder \n\t"
2105 
2106  "lwc1 %[di0], 0(%[in_pos]) \n\t"
2107  "lwc1 %[di1], 4(%[in_pos]) \n\t"
2108  "lwc1 %[di2], 8(%[in_pos]) \n\t"
2109  "lwc1 %[di3], 12(%[in_pos]) \n\t"
2110  "abs.s %[di0], %[di0] \n\t"
2111  "abs.s %[di1], %[di1] \n\t"
2112  "abs.s %[di2], %[di2] \n\t"
2113  "abs.s %[di3], %[di3] \n\t"
2114  "lwc1 $f0, 0(%[vec]) \n\t"
2115  "lwc1 $f1, 4(%[vec]) \n\t"
2116  "lwc1 $f2, 0(%[vec2]) \n\t"
2117  "lwc1 $f3, 4(%[vec2]) \n\t"
2118  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2119  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2120  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2121  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2122 
2123  ".set pop \n\t"
2124 
2125  : [di0]"=&f"(di0), [di1]"=&f"(di1),
2126  [di2]"=&f"(di2), [di3]"=&f"(di3)
2127  : [in_pos]"r"(in_pos), [vec]"r"(vec),
2128  [vec2]"r"(vec2), [IQ]"f"(IQ)
2129  : "$f0", "$f1", "$f2", "$f3",
2130  "memory"
2131  );
2132 
2133  cost += di0 * di0 + di1 * di1
2134  + di2 * di2 + di3 * di3;
2135  }
2136 
2137  if (bits)
2138  *bits = curbits;
2139  if (energy)
2140  *energy = qenergy * (IQ*IQ);
2141  return cost * lambda + curbits;
2142 }
2143 
2144 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2145  PutBitContext *pb, const float *in,
2146  const float *scaled, int size, int scale_idx,
2147  int cb, const float lambda, const float uplim,
2148  int *bits, float *energy)
2149 {
2150  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2151  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2152  const float CLIPPED_ESCAPE = 165140.0f * IQ;
2153  int i;
2154  float cost = 0;
2155  float qenergy = 0.0f;
2156  int qc1, qc2, qc3, qc4;
2157  int curbits = 0;
2158 
2159  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2160  float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2161 
2162  for (i = 0; i < size; i += 4) {
2163  const float *vec, *vec2;
2164  int curidx, curidx2;
2165  float t1, t2, t3, t4, V;
2166  float di1, di2, di3, di4;
2167  int cond0, cond1, cond2, cond3;
2168  int c1, c2, c3, c4;
2169  int t6, t7;
2170 
2171  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2172  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2173  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2174  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2175 
2176  __asm__ volatile (
2177  ".set push \n\t"
2178  ".set noreorder \n\t"
2179 
2180  "ori %[t6], $zero, 15 \n\t"
2181  "ori %[t7], $zero, 16 \n\t"
2182  "shll_s.w %[c1], %[qc1], 18 \n\t"
2183  "shll_s.w %[c2], %[qc2], 18 \n\t"
2184  "shll_s.w %[c3], %[qc3], 18 \n\t"
2185  "shll_s.w %[c4], %[qc4], 18 \n\t"
2186  "srl %[c1], %[c1], 18 \n\t"
2187  "srl %[c2], %[c2], 18 \n\t"
2188  "srl %[c3], %[c3], 18 \n\t"
2189  "srl %[c4], %[c4], 18 \n\t"
2190  "slt %[cond0], %[t6], %[qc1] \n\t"
2191  "slt %[cond1], %[t6], %[qc2] \n\t"
2192  "slt %[cond2], %[t6], %[qc3] \n\t"
2193  "slt %[cond3], %[t6], %[qc4] \n\t"
2194  "movn %[qc1], %[t7], %[cond0] \n\t"
2195  "movn %[qc2], %[t7], %[cond1] \n\t"
2196  "movn %[qc3], %[t7], %[cond2] \n\t"
2197  "movn %[qc4], %[t7], %[cond3] \n\t"
2198 
2199  ".set pop \n\t"
2200 
2201  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2202  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2203  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2204  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2205  [c1]"=&r"(c1), [c2]"=&r"(c2),
2206  [c3]"=&r"(c3), [c4]"=&r"(c4),
2207  [t6]"=&r"(t6), [t7]"=&r"(t7)
2208  );
2209 
2210  curidx = 17 * qc1;
2211  curidx += qc2;
2212 
2213  curidx2 = 17 * qc3;
2214  curidx2 += qc4;
2215 
2216  curbits += p_bits[curidx];
2217  curbits += esc_sign_bits[curidx];
2218  vec = &p_codes[curidx*2];
2219 
2220  curbits += p_bits[curidx2];
2221  curbits += esc_sign_bits[curidx2];
2222  vec2 = &p_codes[curidx2*2];
2223 
2224  curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2225  curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2226  curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2227  curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2228 
2229  t1 = fabsf(in[i ]);
2230  t2 = fabsf(in[i+1]);
2231  t3 = fabsf(in[i+2]);
2232  t4 = fabsf(in[i+3]);
2233 
2234  if (cond0) {
2235  if (t1 >= CLIPPED_ESCAPE) {
2236  di1 = t1 - CLIPPED_ESCAPE;
2237  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2238  } else {
2239  di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2240  qenergy += V*V;
2241  }
2242  } else {
2243  di1 = t1 - (V = vec[0] * IQ);
2244  qenergy += V*V;
2245  }
2246 
2247  if (cond1) {
2248  if (t2 >= CLIPPED_ESCAPE) {
2249  di2 = t2 - CLIPPED_ESCAPE;
2250  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2251  } else {
2252  di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2253  qenergy += V*V;
2254  }
2255  } else {
2256  di2 = t2 - (V = vec[1] * IQ);
2257  qenergy += V*V;
2258  }
2259 
2260  if (cond2) {
2261  if (t3 >= CLIPPED_ESCAPE) {
2262  di3 = t3 - CLIPPED_ESCAPE;
2263  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2264  } else {
2265  di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2266  qenergy += V*V;
2267  }
2268  } else {
2269  di3 = t3 - (V = vec2[0] * IQ);
2270  qenergy += V*V;
2271  }
2272 
2273  if (cond3) {
2274  if (t4 >= CLIPPED_ESCAPE) {
2275  di4 = t4 - CLIPPED_ESCAPE;
2276  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2277  } else {
2278  di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2279  qenergy += V*V;
2280  }
2281  } else {
2282  di4 = t4 - (V = vec2[1]*IQ);
2283  qenergy += V*V;
2284  }
2285 
2286  cost += di1 * di1 + di2 * di2
2287  + di3 * di3 + di4 * di4;
2288  }
2289 
2290  if (bits)
2291  *bits = curbits;
2292  return cost * lambda + curbits;
2293 }
2294 
2295 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2296  PutBitContext *pb, const float *in,
2297  const float *scaled, int size, int scale_idx,
2298  int cb, const float lambda, const float uplim,
2299  int *bits, float *energy) = {
2300  get_band_cost_ZERO_mips,
2301  get_band_cost_SQUAD_mips,
2302  get_band_cost_SQUAD_mips,
2303  get_band_cost_UQUAD_mips,
2304  get_band_cost_UQUAD_mips,
2305  get_band_cost_SPAIR_mips,
2306  get_band_cost_SPAIR_mips,
2307  get_band_cost_UPAIR7_mips,
2308  get_band_cost_UPAIR7_mips,
2309  get_band_cost_UPAIR12_mips,
2310  get_band_cost_UPAIR12_mips,
2311  get_band_cost_ESC_mips,
2312  get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2313  get_band_cost_ZERO_mips,
2314  get_band_cost_ZERO_mips,
2315  get_band_cost_ZERO_mips,
2316 };
2317 
2318 #define get_band_cost( \
2319  s, pb, in, scaled, size, scale_idx, cb, \
2320  lambda, uplim, bits, energy) \
2321  get_band_cost_arr[cb]( \
2322  s, pb, in, scaled, size, scale_idx, cb, \
2323  lambda, uplim, bits, energy)
2324 
2325 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2326  const float *scaled, int size, int scale_idx,
2327  int cb, const float lambda, const float uplim,
2328  int *bits, float *energy, int rtz)
2329 {
2330  return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2331 }
2332 
2334 
2336 
2337 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2338 {
2339  int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2340  uint8_t nextband0[128], nextband1[128];
2341  float M[128], S[128];
2342  float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2343  const float lambda = s->lambda;
2344  const float mslambda = FFMIN(1.0f, lambda / 120.f);
2345  SingleChannelElement *sce0 = &cpe->ch[0];
2346  SingleChannelElement *sce1 = &cpe->ch[1];
2347  if (!cpe->common_window)
2348  return;
2349 
2350  /** Scout out next nonzero bands */
2351  ff_init_nextband_map(sce0, nextband0);
2352  ff_init_nextband_map(sce1, nextband1);
2353 
2354  prev_mid = sce0->sf_idx[0];
2355  prev_side = sce1->sf_idx[0];
2356  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2357  start = 0;
2358  for (g = 0; g < sce0->ics.num_swb; g++) {
2359  float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2360  if (!cpe->is_mask[w*16+g])
2361  cpe->ms_mask[w*16+g] = 0;
2362  if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2363  float Mmax = 0.0f, Smax = 0.0f;
2364 
2365  /* Must compute mid/side SF and book for the whole window group */
2366  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2367  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2368  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2369  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2370  S[i] = M[i]
2371  - sce1->coeffs[start+(w+w2)*128+i];
2372  }
2373  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2374  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2375  for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2376  Mmax = FFMAX(Mmax, M34[i]);
2377  Smax = FFMAX(Smax, S34[i]);
2378  }
2379  }
2380 
2381  for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2382  float dist1 = 0.0f, dist2 = 0.0f;
2383  int B0 = 0, B1 = 0;
2384  int minidx;
2385  int mididx, sididx;
2386  int midcb, sidcb;
2387 
2388  minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2389  mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2390  sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2391  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2392  && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2393  || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2394  /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2395  continue;
2396  }
2397 
2398  midcb = find_min_book(Mmax, mididx);
2399  sidcb = find_min_book(Smax, sididx);
2400 
2401  /* No CB can be zero */
2402  midcb = FFMAX(1,midcb);
2403  sidcb = FFMAX(1,sidcb);
2404 
2405  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2406  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2407  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2408  float minthr = FFMIN(band0->threshold, band1->threshold);
2409  int b1,b2,b3,b4;
2410  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2411  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2412  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2413  S[i] = M[i]
2414  - sce1->coeffs[start+(w+w2)*128+i];
2415  }
2416 
2417  abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2418  abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2419  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2420  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2421  dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2422  L34,
2423  sce0->ics.swb_sizes[g],
2424  sce0->sf_idx[w*16+g],
2425  sce0->band_type[w*16+g],
2426  lambda / band0->threshold, INFINITY, &b1, NULL, 0);
2427  dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2428  R34,
2429  sce1->ics.swb_sizes[g],
2430  sce1->sf_idx[w*16+g],
2431  sce1->band_type[w*16+g],
2432  lambda / band1->threshold, INFINITY, &b2, NULL, 0);
2433  dist2 += quantize_band_cost(s, M,
2434  M34,
2435  sce0->ics.swb_sizes[g],
2436  mididx,
2437  midcb,
2438  lambda / minthr, INFINITY, &b3, NULL, 0);
2439  dist2 += quantize_band_cost(s, S,
2440  S34,
2441  sce1->ics.swb_sizes[g],
2442  sididx,
2443  sidcb,
2444  mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
2445  B0 += b1+b2;
2446  B1 += b3+b4;
2447  dist1 -= b1+b2;
2448  dist2 -= b3+b4;
2449  }
2450  cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2451  if (cpe->ms_mask[w*16+g]) {
2452  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2453  sce0->sf_idx[w*16+g] = mididx;
2454  sce1->sf_idx[w*16+g] = sididx;
2455  sce0->band_type[w*16+g] = midcb;
2456  sce1->band_type[w*16+g] = sidcb;
2457  } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2458  /* ms_mask unneeded, and it confuses some decoders */
2459  cpe->ms_mask[w*16+g] = 0;
2460  }
2461  break;
2462  } else if (B1 > B0) {
2463  /* More boost won't fix this */
2464  break;
2465  }
2466  }
2467  }
2468  if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2469  prev_mid = sce0->sf_idx[w*16+g];
2470  if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2471  prev_side = sce1->sf_idx[w*16+g];
2472  start += sce0->ics.swb_sizes[g];
2473  }
2474  }
2475 }
2476 #endif /*HAVE_MIPSFPU */
2477 
2479 
2480 #endif /* HAVE_INLINE_ASM */
2481 
2483 #if HAVE_INLINE_ASM
2484  AACCoefficientsEncoder *e = c->coder;
2485  int option = c->options.coder;
2486 
2487  if (option == 2) {
2488  e->quantize_and_encode_band = quantize_and_encode_band_mips;
2490 #if HAVE_MIPSFPU
2492 #endif /* HAVE_MIPSFPU */
2493  }
2494 #if HAVE_MIPSFPU
2495  e->search_for_ms = search_for_ms_mips;
2496 #endif /* HAVE_MIPSFPU */
2497 #endif /* HAVE_INLINE_ASM */
2498 }
#define NULL
Definition: coverity.c:32
const char * s
Definition: avisynth_c.h:631
Band types following are encoded differently from others.
Definition: aac.h:86
int coder
Definition: aacenc.h:45
AAC encoder trellis codebook selector.
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:39
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:168
const char * g
Definition: vf_curves.c:108
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
Definition: psymodel.h:61
#define SCALE_MAX_POS
scalefactor index maximum value
Definition: aac.h:150
int av_log2(unsigned v)
Definition: intmath.c:26
AACCoefficientsEncoder * coder
Definition: aacenc.h:115
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
Definition: aacenc_utils.h:185
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:278
int prev_idx
pointer to the previous path point
Definition: aaccoder.c:69
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:281
float lambda
Definition: aacenc.h:119
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:36
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
#define B1
Definition: faandct.c:41
#define t7
Definition: regdef.h:35
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
Definition: aacenc_utils.h:243
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:97
AACEncOptions options
encoding options
Definition: aacenc.h:98
#define M(a, b)
Definition: vp3dsp.c:44
AAC encoder context.
Definition: aacenc.h:96
uint8_t bits
Definition: crc.c:295
uint8_t
SingleChannelElement ch[2]
Definition: aac.h:284
#define t0
Definition: regdef.h:28
void ff_aac_coder_init_mips(AACEncContext *c)
static const uint64_t c1
Definition: murmur3.c:49
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
single band psychoacoustic information
Definition: psymodel.h:50
ptrdiff_t size
Definition: opengl_enc.c:101
#define S(s, c, i)
#define t1
Definition: regdef.h:29
#define t3
Definition: regdef.h:31
GLsizei count
Definition: opengl_enc.c:109
int num_swb
number of scalefactor window bands
Definition: aac.h:183
#define FFMAX(a, b)
Definition: common.h:94
float cost
path cost
Definition: aaccoder.c:70
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:57
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:908
float ff_aac_pow2sf_tab[428]
Definition: aactab.c:35
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
float ff_aac_pow34sf_tab[428]
Definition: aactab.c:36
int cur_channel
current channel for coder context
Definition: aacenc.h:116
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:412
#define FFMIN(a, b)
Definition: common.h:96
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:196
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:61
AAC definitions and structures.
AAC encoder twoloop coder.
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
PutBitContext pb
Definition: aacenc.h:99
#define ROUND_STANDARD
Definition: aacenc_utils.h:35
#define INFINITY
Definition: math.h:27
FILE * out
Definition: movenc-test.c:54
Libavcodec external API header.
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:77
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:91
IndividualChannelStream ics
Definition: aac.h:249
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:59
static av_always_inline float cbrtf(float x)
Definition: libm.h:61
structure used in optimal codebook search
Definition: aaccoder.c:68
uint8_t group_len[8]
Definition: aac.h:179
Replacements for frequently missing libm functions.
option
Definition: libkvazaar.c:278
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:182
#define t5
Definition: regdef.h:33
FFPsyContext psy
Definition: aacenc.h:113
AAC encoder data.
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:257
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:256
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:262
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
AAC encoder utilities.
#define t6
Definition: regdef.h:34
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:248
static double c[64]
ChannelElement * cpe
channel elements
Definition: aacenc.h:112
static const uint64_t c2
Definition: murmur3.c:50
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:275
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:407
#define t4
Definition: regdef.h:32
int len
FFPsyChannel * ch
single channel information
Definition: psymodel.h:93
enum BandType band_type[128]
band types
Definition: aac.h:252
AAC encoder quantization misc reusable function templates.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:154
void INT64 start
Definition: avisynth_c.h:553
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:282
float threshold
Definition: psymodel.h:53
AAC data declarations.
float scoefs[1024]
scaled coefficients
Definition: aacenc.h:127
#define B0
Definition: faandct.c:40
#define t2
Definition: regdef.h:30
#define V
Definition: avdct.c:30
bitstream writer API