75 static const uint8_t uquad_sign_bits[81] = {
76 0, 1, 1, 1, 2, 2, 1, 2, 2,
77 1, 2, 2, 2, 3, 3, 2, 3, 3,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 1, 2, 2, 2, 3, 3, 2, 3, 3,
80 2, 3, 3, 3, 4, 4, 3, 4, 4,
81 2, 3, 3, 3, 4, 4, 3, 4, 4,
82 1, 2, 2, 2, 3, 3, 2, 3, 3,
83 2, 3, 3, 3, 4, 4, 3, 4, 4,
84 2, 3, 3, 3, 4, 4, 3, 4, 4
87 static const uint8_t upair7_sign_bits[64] = {
88 0, 1, 1, 1, 1, 1, 1, 1,
89 1, 2, 2, 2, 2, 2, 2, 2,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
95 1, 2, 2, 2, 2, 2, 2, 2,
98 static const uint8_t upair12_sign_bits[169] = {
99 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
114 static const uint8_t esc_sign_bits[289] = {
115 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
137 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
139 const float *scaled,
int size,
int scale_idx,
140 int cb,
const float lambda,
const float uplim,
141 int *
bits,
float *energy,
const float ROUNDING)
146 int qc1, qc2, qc3, qc4;
147 float qenergy = 0.0f;
155 for (i = 0; i <
size; i += 4) {
157 int *in_int = (
int *)&in[i];
168 ".set noreorder \n\t"
170 "slt %[qc1], $zero, %[qc1] \n\t"
171 "slt %[qc2], $zero, %[qc2] \n\t"
172 "slt %[qc3], $zero, %[qc3] \n\t"
173 "slt %[qc4], $zero, %[qc4] \n\t"
174 "lw %[t0], 0(%[in_int]) \n\t"
175 "lw %[t1], 4(%[in_int]) \n\t"
176 "lw %[t2], 8(%[in_int]) \n\t"
177 "lw %[t3], 12(%[in_int]) \n\t"
178 "srl %[t0], %[t0], 31 \n\t"
179 "srl %[t1], %[t1], 31 \n\t"
180 "srl %[t2], %[t2], 31 \n\t"
181 "srl %[t3], %[t3], 31 \n\t"
182 "subu %[t4], $zero, %[qc1] \n\t"
183 "subu %[t5], $zero, %[qc2] \n\t"
184 "subu %[t6], $zero, %[qc3] \n\t"
185 "subu %[t7], $zero, %[qc4] \n\t"
186 "movn %[qc1], %[t4], %[t0] \n\t"
187 "movn %[qc2], %[t5], %[t1] \n\t"
188 "movn %[qc3], %[t6], %[t2] \n\t"
189 "movn %[qc4], %[t7], %[t3] \n\t"
193 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
194 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
195 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
196 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
197 : [in_int]
"r"(in_int)
210 put_bits(pb, p_bits[curidx], p_codes[curidx]);
214 vec = &p_vec[curidx*4];
226 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
233 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *s,
235 const float *scaled,
int size,
int scale_idx,
236 int cb,
const float lambda,
const float uplim,
237 int *bits,
float *energy,
const float ROUNDING)
242 int qc1, qc2, qc3, qc4;
243 float qenergy = 0.0f;
251 for (i = 0; i <
size; i += 4) {
252 int curidx, sign,
count;
253 int *in_int = (
int *)&in[i];
255 unsigned int v_codes;
266 ".set noreorder \n\t"
268 "ori %[t4], $zero, 2 \n\t"
269 "ori %[sign], $zero, 0 \n\t"
270 "slt %[t0], %[t4], %[qc1] \n\t"
271 "slt %[t1], %[t4], %[qc2] \n\t"
272 "slt %[t2], %[t4], %[qc3] \n\t"
273 "slt %[t3], %[t4], %[qc4] \n\t"
274 "movn %[qc1], %[t4], %[t0] \n\t"
275 "movn %[qc2], %[t4], %[t1] \n\t"
276 "movn %[qc3], %[t4], %[t2] \n\t"
277 "movn %[qc4], %[t4], %[t3] \n\t"
278 "lw %[t0], 0(%[in_int]) \n\t"
279 "lw %[t1], 4(%[in_int]) \n\t"
280 "lw %[t2], 8(%[in_int]) \n\t"
281 "lw %[t3], 12(%[in_int]) \n\t"
282 "slt %[t0], %[t0], $zero \n\t"
283 "movn %[sign], %[t0], %[qc1] \n\t"
284 "slt %[t1], %[t1], $zero \n\t"
285 "slt %[t2], %[t2], $zero \n\t"
286 "slt %[t3], %[t3], $zero \n\t"
287 "sll %[t0], %[sign], 1 \n\t"
288 "or %[t0], %[t0], %[t1] \n\t"
289 "movn %[sign], %[t0], %[qc2] \n\t"
290 "slt %[t4], $zero, %[qc1] \n\t"
291 "slt %[t1], $zero, %[qc2] \n\t"
292 "slt %[count], $zero, %[qc3] \n\t"
293 "sll %[t0], %[sign], 1 \n\t"
294 "or %[t0], %[t0], %[t2] \n\t"
295 "movn %[sign], %[t0], %[qc3] \n\t"
296 "slt %[t2], $zero, %[qc4] \n\t"
297 "addu %[count], %[count], %[t4] \n\t"
298 "addu %[count], %[count], %[t1] \n\t"
299 "sll %[t0], %[sign], 1 \n\t"
300 "or %[t0], %[t0], %[t3] \n\t"
301 "movn %[sign], %[t0], %[qc4] \n\t"
302 "addu %[count], %[count], %[t2] \n\t"
306 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
307 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
308 [sign]
"=&r"(sign), [count]
"=&r"(count),
309 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
311 : [in_int]
"r"(in_int)
323 v_codes = (p_codes[curidx] <<
count) | (sign & ((1 << count) - 1));
324 v_bits = p_bits[curidx] +
count;
329 vec = &p_vec[curidx*4];
330 e1 = copysignf(vec[0] * IQ, in[i+0]);
331 e2 = copysignf(vec[1] * IQ, in[i+1]);
332 e3 = copysignf(vec[2] * IQ, in[i+2]);
333 e4 = copysignf(vec[3] * IQ, in[i+3]);
341 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
348 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *s,
350 const float *scaled,
int size,
int scale_idx,
351 int cb,
const float lambda,
const float uplim,
352 int *bits,
float *energy,
const float ROUNDING)
357 int qc1, qc2, qc3, qc4;
358 float qenergy = 0.0f;
366 for (i = 0; i <
size; i += 4) {
368 int *in_int = (
int *)&in[i];
370 unsigned int v_codes;
372 const float *vec1, *vec2;
381 ".set noreorder \n\t"
383 "ori %[t4], $zero, 4 \n\t"
384 "slt %[t0], %[t4], %[qc1] \n\t"
385 "slt %[t1], %[t4], %[qc2] \n\t"
386 "slt %[t2], %[t4], %[qc3] \n\t"
387 "slt %[t3], %[t4], %[qc4] \n\t"
388 "movn %[qc1], %[t4], %[t0] \n\t"
389 "movn %[qc2], %[t4], %[t1] \n\t"
390 "movn %[qc3], %[t4], %[t2] \n\t"
391 "movn %[qc4], %[t4], %[t3] \n\t"
392 "lw %[t0], 0(%[in_int]) \n\t"
393 "lw %[t1], 4(%[in_int]) \n\t"
394 "lw %[t2], 8(%[in_int]) \n\t"
395 "lw %[t3], 12(%[in_int]) \n\t"
396 "srl %[t0], %[t0], 31 \n\t"
397 "srl %[t1], %[t1], 31 \n\t"
398 "srl %[t2], %[t2], 31 \n\t"
399 "srl %[t3], %[t3], 31 \n\t"
400 "subu %[t4], $zero, %[qc1] \n\t"
401 "subu %[t5], $zero, %[qc2] \n\t"
402 "subu %[t6], $zero, %[qc3] \n\t"
403 "subu %[t7], $zero, %[qc4] \n\t"
404 "movn %[qc1], %[t4], %[t0] \n\t"
405 "movn %[qc2], %[t5], %[t1] \n\t"
406 "movn %[qc3], %[t6], %[t2] \n\t"
407 "movn %[qc4], %[t7], %[t3] \n\t"
411 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
412 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
413 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
414 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
415 : [in_int]
"r"(in_int)
425 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
426 v_bits = p_bits[curidx] + p_bits[curidx2];
431 vec1 = &p_vec[curidx*2 ];
432 vec2 = &p_vec[curidx2*2];
444 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
451 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *s,
453 const float *scaled,
int size,
int scale_idx,
454 int cb,
const float lambda,
const float uplim,
455 int *bits,
float *energy,
const float ROUNDING)
460 int qc1, qc2, qc3, qc4;
461 float qenergy = 0.0f;
469 for (i = 0; i <
size; i += 4) {
470 int curidx1, curidx2, sign1, count1, sign2, count2;
471 int *in_int = (
int *)&in[i];
473 unsigned int v_codes;
475 const float *vec1, *vec2;
484 ".set noreorder \n\t"
486 "ori %[t4], $zero, 7 \n\t"
487 "ori %[sign1], $zero, 0 \n\t"
488 "ori %[sign2], $zero, 0 \n\t"
489 "slt %[t0], %[t4], %[qc1] \n\t"
490 "slt %[t1], %[t4], %[qc2] \n\t"
491 "slt %[t2], %[t4], %[qc3] \n\t"
492 "slt %[t3], %[t4], %[qc4] \n\t"
493 "movn %[qc1], %[t4], %[t0] \n\t"
494 "movn %[qc2], %[t4], %[t1] \n\t"
495 "movn %[qc3], %[t4], %[t2] \n\t"
496 "movn %[qc4], %[t4], %[t3] \n\t"
497 "lw %[t0], 0(%[in_int]) \n\t"
498 "lw %[t1], 4(%[in_int]) \n\t"
499 "lw %[t2], 8(%[in_int]) \n\t"
500 "lw %[t3], 12(%[in_int]) \n\t"
501 "slt %[t0], %[t0], $zero \n\t"
502 "movn %[sign1], %[t0], %[qc1] \n\t"
503 "slt %[t2], %[t2], $zero \n\t"
504 "movn %[sign2], %[t2], %[qc3] \n\t"
505 "slt %[t1], %[t1], $zero \n\t"
506 "sll %[t0], %[sign1], 1 \n\t"
507 "or %[t0], %[t0], %[t1] \n\t"
508 "movn %[sign1], %[t0], %[qc2] \n\t"
509 "slt %[t3], %[t3], $zero \n\t"
510 "sll %[t0], %[sign2], 1 \n\t"
511 "or %[t0], %[t0], %[t3] \n\t"
512 "movn %[sign2], %[t0], %[qc4] \n\t"
513 "slt %[count1], $zero, %[qc1] \n\t"
514 "slt %[t1], $zero, %[qc2] \n\t"
515 "slt %[count2], $zero, %[qc3] \n\t"
516 "slt %[t2], $zero, %[qc4] \n\t"
517 "addu %[count1], %[count1], %[t1] \n\t"
518 "addu %[count2], %[count2], %[t2] \n\t"
522 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
523 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
524 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
525 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
526 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
528 : [in_int]
"r"(in_int)
529 :
"t0",
"t1",
"t2",
"t3",
"t4",
536 v_codes = (p_codes[curidx1] << count1) | sign1;
537 v_bits = p_bits[curidx1] + count1;
543 v_codes = (p_codes[curidx2] << count2) | sign2;
544 v_bits = p_bits[curidx2] + count2;
549 vec1 = &p_vec[curidx1*2];
550 vec2 = &p_vec[curidx2*2];
551 e1 = copysignf(vec1[0] * IQ, in[i+0]);
552 e2 = copysignf(vec1[1] * IQ, in[i+1]);
553 e3 = copysignf(vec2[0] * IQ, in[i+2]);
554 e4 = copysignf(vec2[1] * IQ, in[i+3]);
562 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
569 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *s,
571 const float *scaled,
int size,
int scale_idx,
572 int cb,
const float lambda,
const float uplim,
573 int *bits,
float *energy,
const float ROUNDING)
578 int qc1, qc2, qc3, qc4;
579 float qenergy = 0.0f;
587 for (i = 0; i <
size; i += 4) {
588 int curidx1, curidx2, sign1, count1, sign2, count2;
589 int *in_int = (
int *)&in[i];
591 unsigned int v_codes;
593 const float *vec1, *vec2;
602 ".set noreorder \n\t"
604 "ori %[t4], $zero, 12 \n\t"
605 "ori %[sign1], $zero, 0 \n\t"
606 "ori %[sign2], $zero, 0 \n\t"
607 "slt %[t0], %[t4], %[qc1] \n\t"
608 "slt %[t1], %[t4], %[qc2] \n\t"
609 "slt %[t2], %[t4], %[qc3] \n\t"
610 "slt %[t3], %[t4], %[qc4] \n\t"
611 "movn %[qc1], %[t4], %[t0] \n\t"
612 "movn %[qc2], %[t4], %[t1] \n\t"
613 "movn %[qc3], %[t4], %[t2] \n\t"
614 "movn %[qc4], %[t4], %[t3] \n\t"
615 "lw %[t0], 0(%[in_int]) \n\t"
616 "lw %[t1], 4(%[in_int]) \n\t"
617 "lw %[t2], 8(%[in_int]) \n\t"
618 "lw %[t3], 12(%[in_int]) \n\t"
619 "slt %[t0], %[t0], $zero \n\t"
620 "movn %[sign1], %[t0], %[qc1] \n\t"
621 "slt %[t2], %[t2], $zero \n\t"
622 "movn %[sign2], %[t2], %[qc3] \n\t"
623 "slt %[t1], %[t1], $zero \n\t"
624 "sll %[t0], %[sign1], 1 \n\t"
625 "or %[t0], %[t0], %[t1] \n\t"
626 "movn %[sign1], %[t0], %[qc2] \n\t"
627 "slt %[t3], %[t3], $zero \n\t"
628 "sll %[t0], %[sign2], 1 \n\t"
629 "or %[t0], %[t0], %[t3] \n\t"
630 "movn %[sign2], %[t0], %[qc4] \n\t"
631 "slt %[count1], $zero, %[qc1] \n\t"
632 "slt %[t1], $zero, %[qc2] \n\t"
633 "slt %[count2], $zero, %[qc3] \n\t"
634 "slt %[t2], $zero, %[qc4] \n\t"
635 "addu %[count1], %[count1], %[t1] \n\t"
636 "addu %[count2], %[count2], %[t2] \n\t"
640 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
641 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
642 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
643 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
644 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
646 : [in_int]
"r"(in_int)
653 v_codes = (p_codes[curidx1] << count1) | sign1;
654 v_bits = p_bits[curidx1] + count1;
660 v_codes = (p_codes[curidx2] << count2) | sign2;
661 v_bits = p_bits[curidx2] + count2;
666 vec1 = &p_vec[curidx1*2];
667 vec2 = &p_vec[curidx2*2];
668 e1 = copysignf(vec1[0] * IQ, in[i+0]);
669 e2 = copysignf(vec1[1] * IQ, in[i+1]);
670 e3 = copysignf(vec2[0] * IQ, in[i+2]);
671 e4 = copysignf(vec2[1] * IQ, in[i+3]);
679 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
686 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *s,
688 const float *scaled,
int size,
int scale_idx,
689 int cb,
const float lambda,
const float uplim,
690 int *bits,
float *energy,
const float ROUNDING)
695 int qc1, qc2, qc3, qc4;
696 float qenergy = 0.0f;
706 for (i = 0; i <
size; i += 4) {
707 int curidx, curidx2, sign1, count1, sign2, count2;
708 int *in_int = (
int *)&in[i];
710 unsigned int v_codes;
712 const float *vec1, *vec2;
714 qc1 = scaled[i ] * Q34 + ROUNDING;
715 qc2 = scaled[i+1] * Q34 + ROUNDING;
716 qc3 = scaled[i+2] * Q34 + ROUNDING;
717 qc4 = scaled[i+3] * Q34 + ROUNDING;
721 ".set noreorder \n\t"
723 "ori %[t4], $zero, 16 \n\t"
724 "ori %[sign1], $zero, 0 \n\t"
725 "ori %[sign2], $zero, 0 \n\t"
726 "slt %[t0], %[t4], %[qc1] \n\t"
727 "slt %[t1], %[t4], %[qc2] \n\t"
728 "slt %[t2], %[t4], %[qc3] \n\t"
729 "slt %[t3], %[t4], %[qc4] \n\t"
730 "movn %[qc1], %[t4], %[t0] \n\t"
731 "movn %[qc2], %[t4], %[t1] \n\t"
732 "movn %[qc3], %[t4], %[t2] \n\t"
733 "movn %[qc4], %[t4], %[t3] \n\t"
734 "lw %[t0], 0(%[in_int]) \n\t"
735 "lw %[t1], 4(%[in_int]) \n\t"
736 "lw %[t2], 8(%[in_int]) \n\t"
737 "lw %[t3], 12(%[in_int]) \n\t"
738 "slt %[t0], %[t0], $zero \n\t"
739 "movn %[sign1], %[t0], %[qc1] \n\t"
740 "slt %[t2], %[t2], $zero \n\t"
741 "movn %[sign2], %[t2], %[qc3] \n\t"
742 "slt %[t1], %[t1], $zero \n\t"
743 "sll %[t0], %[sign1], 1 \n\t"
744 "or %[t0], %[t0], %[t1] \n\t"
745 "movn %[sign1], %[t0], %[qc2] \n\t"
746 "slt %[t3], %[t3], $zero \n\t"
747 "sll %[t0], %[sign2], 1 \n\t"
748 "or %[t0], %[t0], %[t3] \n\t"
749 "movn %[sign2], %[t0], %[qc4] \n\t"
750 "slt %[count1], $zero, %[qc1] \n\t"
751 "slt %[t1], $zero, %[qc2] \n\t"
752 "slt %[count2], $zero, %[qc3] \n\t"
753 "slt %[t2], $zero, %[qc4] \n\t"
754 "addu %[count1], %[count1], %[t1] \n\t"
755 "addu %[count2], %[count2], %[t2] \n\t"
759 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
760 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
761 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
762 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
763 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
765 : [in_int]
"r"(in_int)
774 v_codes = (p_codes[curidx] << count1) | sign1;
775 v_bits = p_bits[curidx] + count1;
778 v_codes = (p_codes[curidx2] << count2) | sign2;
779 v_bits = p_bits[curidx2] + count2;
784 vec1 = &p_vectors[curidx*2 ];
785 vec2 = &p_vectors[curidx2*2];
786 e1 = copysignf(vec1[0] * IQ, in[i+0]);
787 e2 = copysignf(vec1[1] * IQ, in[i+1]);
788 e3 = copysignf(vec2[0] * IQ, in[i+2]);
789 e4 = copysignf(vec2[1] * IQ, in[i+3]);
797 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
801 for (i = 0; i <
size; i += 4) {
802 int curidx, curidx2, sign1, count1, sign2, count2;
803 int *in_int = (
int *)&in[i];
805 unsigned int v_codes;
809 qc1 = scaled[i ] * Q34 + ROUNDING;
810 qc2 = scaled[i+1] * Q34 + ROUNDING;
811 qc3 = scaled[i+2] * Q34 + ROUNDING;
812 qc4 = scaled[i+3] * Q34 + ROUNDING;
816 ".set noreorder \n\t"
818 "ori %[t4], $zero, 16 \n\t"
819 "ori %[sign1], $zero, 0 \n\t"
820 "ori %[sign2], $zero, 0 \n\t"
821 "shll_s.w %[c1], %[qc1], 18 \n\t"
822 "shll_s.w %[c2], %[qc2], 18 \n\t"
823 "shll_s.w %[c3], %[qc3], 18 \n\t"
824 "shll_s.w %[c4], %[qc4], 18 \n\t"
825 "srl %[c1], %[c1], 18 \n\t"
826 "srl %[c2], %[c2], 18 \n\t"
827 "srl %[c3], %[c3], 18 \n\t"
828 "srl %[c4], %[c4], 18 \n\t"
829 "slt %[t0], %[t4], %[qc1] \n\t"
830 "slt %[t1], %[t4], %[qc2] \n\t"
831 "slt %[t2], %[t4], %[qc3] \n\t"
832 "slt %[t3], %[t4], %[qc4] \n\t"
833 "movn %[qc1], %[t4], %[t0] \n\t"
834 "movn %[qc2], %[t4], %[t1] \n\t"
835 "movn %[qc3], %[t4], %[t2] \n\t"
836 "movn %[qc4], %[t4], %[t3] \n\t"
837 "lw %[t0], 0(%[in_int]) \n\t"
838 "lw %[t1], 4(%[in_int]) \n\t"
839 "lw %[t2], 8(%[in_int]) \n\t"
840 "lw %[t3], 12(%[in_int]) \n\t"
841 "slt %[t0], %[t0], $zero \n\t"
842 "movn %[sign1], %[t0], %[qc1] \n\t"
843 "slt %[t2], %[t2], $zero \n\t"
844 "movn %[sign2], %[t2], %[qc3] \n\t"
845 "slt %[t1], %[t1], $zero \n\t"
846 "sll %[t0], %[sign1], 1 \n\t"
847 "or %[t0], %[t0], %[t1] \n\t"
848 "movn %[sign1], %[t0], %[qc2] \n\t"
849 "slt %[t3], %[t3], $zero \n\t"
850 "sll %[t0], %[sign2], 1 \n\t"
851 "or %[t0], %[t0], %[t3] \n\t"
852 "movn %[sign2], %[t0], %[qc4] \n\t"
853 "slt %[count1], $zero, %[qc1] \n\t"
854 "slt %[t1], $zero, %[qc2] \n\t"
855 "slt %[count2], $zero, %[qc3] \n\t"
856 "slt %[t2], $zero, %[qc4] \n\t"
857 "addu %[count1], %[count1], %[t1] \n\t"
858 "addu %[count2], %[count2], %[t2] \n\t"
862 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
863 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
864 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
865 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
866 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
867 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
868 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
870 : [in_int]
"r"(in_int)
880 v_codes = (p_codes[curidx] << count1) | sign1;
881 v_bits = p_bits[curidx] + count1;
884 if (p_vectors[curidx*2 ] == 64.0f) {
886 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 <<
len) - 1));
889 if (p_vectors[curidx*2+1] == 64.0f) {
891 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 <<
len) - 1));
895 v_codes = (p_codes[curidx2] << count2) | sign2;
896 v_bits = p_bits[curidx2] + count2;
899 if (p_vectors[curidx2*2 ] == 64.0f) {
901 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 <<
len) - 1));
904 if (p_vectors[curidx2*2+1] == 64.0f) {
906 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 <<
len) - 1));
911 float e1, e2, e3, e4;
912 e1 = copysignf(c1 *
cbrtf(c1) * IQ, in[i+0]);
913 e2 = copysignf(c2 *
cbrtf(c2) * IQ, in[i+1]);
914 e3 = copysignf(c3 *
cbrtf(c3) * IQ, in[i+2]);
915 e4 = copysignf(c4 *
cbrtf(c4) * IQ, in[i+3]);
923 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
931 static void quantize_and_encode_band_cost_NONE_mips(
struct AACEncContext *s,
933 const float *scaled,
int size,
int scale_idx,
934 int cb,
const float lambda,
const float uplim,
935 int *bits,
float *energy,
const float ROUNDING) {
939 static void quantize_and_encode_band_cost_ZERO_mips(
struct AACEncContext *s,
941 const float *scaled,
int size,
int scale_idx,
942 int cb,
const float lambda,
const float uplim,
943 int *bits,
float *energy,
const float ROUNDING) {
948 for (i = 0; i <
size; i += 4) {
961 const float *scaled,
int size,
int scale_idx,
962 int cb,
const float lambda,
const float uplim,
963 int *
bits,
float *energy,
const float ROUNDING) = {
964 quantize_and_encode_band_cost_ZERO_mips,
965 quantize_and_encode_band_cost_SQUAD_mips,
966 quantize_and_encode_band_cost_SQUAD_mips,
967 quantize_and_encode_band_cost_UQUAD_mips,
968 quantize_and_encode_band_cost_UQUAD_mips,
969 quantize_and_encode_band_cost_SPAIR_mips,
970 quantize_and_encode_band_cost_SPAIR_mips,
971 quantize_and_encode_band_cost_UPAIR7_mips,
972 quantize_and_encode_band_cost_UPAIR7_mips,
973 quantize_and_encode_band_cost_UPAIR12_mips,
974 quantize_and_encode_band_cost_UPAIR12_mips,
975 quantize_and_encode_band_cost_ESC_mips,
976 quantize_and_encode_band_cost_NONE_mips,
977 quantize_and_encode_band_cost_ZERO_mips,
978 quantize_and_encode_band_cost_ZERO_mips,
979 quantize_and_encode_band_cost_ZERO_mips,
982 #define quantize_and_encode_band_cost( \
983 s, pb, in, out, scaled, size, scale_idx, cb, \
984 lambda, uplim, bits, energy, ROUNDING) \
985 quantize_and_encode_band_cost_arr[cb]( \
986 s, pb, in, out, scaled, size, scale_idx, cb, \
987 lambda, uplim, bits, energy, ROUNDING)
990 const float *in,
float *out,
int size,
int scale_idx,
991 int cb,
const float lambda,
int rtz)
1000 static float get_band_numbits_ZERO_mips(
struct AACEncContext *s,
1002 const float *scaled,
int size,
int scale_idx,
1003 int cb,
const float lambda,
const float uplim,
1009 static float get_band_numbits_NONE_mips(
struct AACEncContext *s,
1011 const float *scaled,
int size,
int scale_idx,
1012 int cb,
const float lambda,
const float uplim,
1019 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *s,
1021 const float *scaled,
int size,
int scale_idx,
1022 int cb,
const float lambda,
const float uplim,
1027 int qc1, qc2, qc3, qc4;
1032 for (i = 0; i <
size; i += 4) {
1034 int *in_int = (
int *)&in[i];
1044 ".set noreorder \n\t"
1046 "slt %[qc1], $zero, %[qc1] \n\t"
1047 "slt %[qc2], $zero, %[qc2] \n\t"
1048 "slt %[qc3], $zero, %[qc3] \n\t"
1049 "slt %[qc4], $zero, %[qc4] \n\t"
1050 "lw %[t0], 0(%[in_int]) \n\t"
1051 "lw %[t1], 4(%[in_int]) \n\t"
1052 "lw %[t2], 8(%[in_int]) \n\t"
1053 "lw %[t3], 12(%[in_int]) \n\t"
1054 "srl %[t0], %[t0], 31 \n\t"
1055 "srl %[t1], %[t1], 31 \n\t"
1056 "srl %[t2], %[t2], 31 \n\t"
1057 "srl %[t3], %[t3], 31 \n\t"
1058 "subu %[t4], $zero, %[qc1] \n\t"
1059 "subu %[t5], $zero, %[qc2] \n\t"
1060 "subu %[t6], $zero, %[qc3] \n\t"
1061 "subu %[t7], $zero, %[qc4] \n\t"
1062 "movn %[qc1], %[t4], %[t0] \n\t"
1063 "movn %[qc2], %[t5], %[t1] \n\t"
1064 "movn %[qc3], %[t6], %[t2] \n\t"
1065 "movn %[qc4], %[t7], %[t3] \n\t"
1069 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1070 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1071 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1072 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1073 : [in_int]
"r"(in_int)
1086 curbits += p_bits[curidx];
1091 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *s,
1093 const float *scaled,
int size,
int scale_idx,
1094 int cb,
const float lambda,
const float uplim,
1100 int qc1, qc2, qc3, qc4;
1104 for (i = 0; i <
size; i += 4) {
1115 ".set noreorder \n\t"
1117 "ori %[t4], $zero, 2 \n\t"
1118 "slt %[t0], %[t4], %[qc1] \n\t"
1119 "slt %[t1], %[t4], %[qc2] \n\t"
1120 "slt %[t2], %[t4], %[qc3] \n\t"
1121 "slt %[t3], %[t4], %[qc4] \n\t"
1122 "movn %[qc1], %[t4], %[t0] \n\t"
1123 "movn %[qc2], %[t4], %[t1] \n\t"
1124 "movn %[qc3], %[t4], %[t2] \n\t"
1125 "movn %[qc4], %[t4], %[t3] \n\t"
1129 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1130 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1131 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1143 curbits += p_bits[curidx];
1144 curbits += uquad_sign_bits[curidx];
1149 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *s,
1151 const float *scaled,
int size,
int scale_idx,
1152 int cb,
const float lambda,
const float uplim,
1157 int qc1, qc2, qc3, qc4;
1162 for (i = 0; i <
size; i += 4) {
1163 int curidx, curidx2;
1164 int *in_int = (
int *)&in[i];
1174 ".set noreorder \n\t"
1176 "ori %[t4], $zero, 4 \n\t"
1177 "slt %[t0], %[t4], %[qc1] \n\t"
1178 "slt %[t1], %[t4], %[qc2] \n\t"
1179 "slt %[t2], %[t4], %[qc3] \n\t"
1180 "slt %[t3], %[t4], %[qc4] \n\t"
1181 "movn %[qc1], %[t4], %[t0] \n\t"
1182 "movn %[qc2], %[t4], %[t1] \n\t"
1183 "movn %[qc3], %[t4], %[t2] \n\t"
1184 "movn %[qc4], %[t4], %[t3] \n\t"
1185 "lw %[t0], 0(%[in_int]) \n\t"
1186 "lw %[t1], 4(%[in_int]) \n\t"
1187 "lw %[t2], 8(%[in_int]) \n\t"
1188 "lw %[t3], 12(%[in_int]) \n\t"
1189 "srl %[t0], %[t0], 31 \n\t"
1190 "srl %[t1], %[t1], 31 \n\t"
1191 "srl %[t2], %[t2], 31 \n\t"
1192 "srl %[t3], %[t3], 31 \n\t"
1193 "subu %[t4], $zero, %[qc1] \n\t"
1194 "subu %[t5], $zero, %[qc2] \n\t"
1195 "subu %[t6], $zero, %[qc3] \n\t"
1196 "subu %[t7], $zero, %[qc4] \n\t"
1197 "movn %[qc1], %[t4], %[t0] \n\t"
1198 "movn %[qc2], %[t5], %[t1] \n\t"
1199 "movn %[qc3], %[t6], %[t2] \n\t"
1200 "movn %[qc4], %[t7], %[t3] \n\t"
1204 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1205 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1206 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1207 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1208 : [in_int]
"r"(in_int)
1216 curidx2 += qc4 + 40;
1218 curbits += p_bits[curidx] + p_bits[curidx2];
1223 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *s,
1225 const float *scaled,
int size,
int scale_idx,
1226 int cb,
const float lambda,
const float uplim,
1231 int qc1, qc2, qc3, qc4;
1236 for (i = 0; i <
size; i += 4) {
1237 int curidx, curidx2;
1247 ".set noreorder \n\t"
1249 "ori %[t4], $zero, 7 \n\t"
1250 "slt %[t0], %[t4], %[qc1] \n\t"
1251 "slt %[t1], %[t4], %[qc2] \n\t"
1252 "slt %[t2], %[t4], %[qc3] \n\t"
1253 "slt %[t3], %[t4], %[qc4] \n\t"
1254 "movn %[qc1], %[t4], %[t0] \n\t"
1255 "movn %[qc2], %[t4], %[t1] \n\t"
1256 "movn %[qc3], %[t4], %[t2] \n\t"
1257 "movn %[qc4], %[t4], %[t3] \n\t"
1261 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1262 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1263 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1273 curbits += p_bits[curidx] +
1274 upair7_sign_bits[curidx] +
1276 upair7_sign_bits[curidx2];
1281 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *s,
1283 const float *scaled,
int size,
int scale_idx,
1284 int cb,
const float lambda,
const float uplim,
1289 int qc1, qc2, qc3, qc4;
1294 for (i = 0; i <
size; i += 4) {
1295 int curidx, curidx2;
1305 ".set noreorder \n\t"
1307 "ori %[t4], $zero, 12 \n\t"
1308 "slt %[t0], %[t4], %[qc1] \n\t"
1309 "slt %[t1], %[t4], %[qc2] \n\t"
1310 "slt %[t2], %[t4], %[qc3] \n\t"
1311 "slt %[t3], %[t4], %[qc4] \n\t"
1312 "movn %[qc1], %[t4], %[t0] \n\t"
1313 "movn %[qc2], %[t4], %[t1] \n\t"
1314 "movn %[qc3], %[t4], %[t2] \n\t"
1315 "movn %[qc4], %[t4], %[t3] \n\t"
1319 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1320 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1321 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1331 curbits += p_bits[curidx] +
1333 upair12_sign_bits[curidx] +
1334 upair12_sign_bits[curidx2];
1339 static float get_band_numbits_ESC_mips(
struct AACEncContext *s,
1341 const float *scaled,
int size,
int scale_idx,
1342 int cb,
const float lambda,
const float uplim,
1347 int qc1, qc2, qc3, qc4;
1352 for (i = 0; i <
size; i += 4) {
1353 int curidx, curidx2;
1354 int cond0, cond1, cond2, cond3;
1365 ".set noreorder \n\t"
1367 "ori %[t4], $zero, 15 \n\t"
1368 "ori %[t5], $zero, 16 \n\t"
1369 "shll_s.w %[c1], %[qc1], 18 \n\t"
1370 "shll_s.w %[c2], %[qc2], 18 \n\t"
1371 "shll_s.w %[c3], %[qc3], 18 \n\t"
1372 "shll_s.w %[c4], %[qc4], 18 \n\t"
1373 "srl %[c1], %[c1], 18 \n\t"
1374 "srl %[c2], %[c2], 18 \n\t"
1375 "srl %[c3], %[c3], 18 \n\t"
1376 "srl %[c4], %[c4], 18 \n\t"
1377 "slt %[cond0], %[t4], %[qc1] \n\t"
1378 "slt %[cond1], %[t4], %[qc2] \n\t"
1379 "slt %[cond2], %[t4], %[qc3] \n\t"
1380 "slt %[cond3], %[t4], %[qc4] \n\t"
1381 "movn %[qc1], %[t5], %[cond0] \n\t"
1382 "movn %[qc2], %[t5], %[cond1] \n\t"
1383 "movn %[qc3], %[t5], %[cond2] \n\t"
1384 "movn %[qc4], %[t5], %[cond3] \n\t"
1385 "ori %[t5], $zero, 31 \n\t"
1386 "clz %[c1], %[c1] \n\t"
1387 "clz %[c2], %[c2] \n\t"
1388 "clz %[c3], %[c3] \n\t"
1389 "clz %[c4], %[c4] \n\t"
1390 "subu %[c1], %[t5], %[c1] \n\t"
1391 "subu %[c2], %[t5], %[c2] \n\t"
1392 "subu %[c3], %[t5], %[c3] \n\t"
1393 "subu %[c4], %[t5], %[c4] \n\t"
1394 "sll %[c1], %[c1], 1 \n\t"
1395 "sll %[c2], %[c2], 1 \n\t"
1396 "sll %[c3], %[c3], 1 \n\t"
1397 "sll %[c4], %[c4], 1 \n\t"
1398 "addiu %[c1], %[c1], -3 \n\t"
1399 "addiu %[c2], %[c2], -3 \n\t"
1400 "addiu %[c3], %[c3], -3 \n\t"
1401 "addiu %[c4], %[c4], -3 \n\t"
1402 "subu %[cond0], $zero, %[cond0] \n\t"
1403 "subu %[cond1], $zero, %[cond1] \n\t"
1404 "subu %[cond2], $zero, %[cond2] \n\t"
1405 "subu %[cond3], $zero, %[cond3] \n\t"
1406 "and %[c1], %[c1], %[cond0] \n\t"
1407 "and %[c2], %[c2], %[cond1] \n\t"
1408 "and %[c3], %[c3], %[cond2] \n\t"
1409 "and %[c4], %[c4], %[cond3] \n\t"
1413 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1414 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1415 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
1416 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
1417 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
1418 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
1419 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5)
1428 curbits += p_bits[curidx];
1429 curbits += esc_sign_bits[curidx];
1430 curbits += p_bits[curidx2];
1431 curbits += esc_sign_bits[curidx2];
1441 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
1443 const float *scaled,
int size,
int scale_idx,
1444 int cb,
const float lambda,
const float uplim,
1446 get_band_numbits_ZERO_mips,
1447 get_band_numbits_SQUAD_mips,
1448 get_band_numbits_SQUAD_mips,
1449 get_band_numbits_UQUAD_mips,
1450 get_band_numbits_UQUAD_mips,
1451 get_band_numbits_SPAIR_mips,
1452 get_band_numbits_SPAIR_mips,
1453 get_band_numbits_UPAIR7_mips,
1454 get_band_numbits_UPAIR7_mips,
1455 get_band_numbits_UPAIR12_mips,
1456 get_band_numbits_UPAIR12_mips,
1457 get_band_numbits_ESC_mips,
1458 get_band_numbits_NONE_mips,
1459 get_band_numbits_ZERO_mips,
1460 get_band_numbits_ZERO_mips,
1461 get_band_numbits_ZERO_mips,
1464 #define get_band_numbits( \
1465 s, pb, in, scaled, size, scale_idx, cb, \
1466 lambda, uplim, bits) \
1467 get_band_numbits_arr[cb]( \
1468 s, pb, in, scaled, size, scale_idx, cb, \
1469 lambda, uplim, bits)
1472 const float *scaled,
int size,
int scale_idx,
1473 int cb,
const float lambda,
const float uplim,
1474 int *bits,
float *energy,
int rtz)
1476 return get_band_numbits(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1483 static float get_band_cost_ZERO_mips(
struct AACEncContext *s,
1485 const float *scaled,
int size,
int scale_idx,
1486 int cb,
const float lambda,
const float uplim,
1487 int *bits,
float *energy)
1492 for (i = 0; i <
size; i += 4) {
1493 cost += in[i ] * in[i ];
1494 cost += in[i+1] * in[i+1];
1495 cost += in[i+2] * in[i+2];
1496 cost += in[i+3] * in[i+3];
1505 static float get_band_cost_NONE_mips(
struct AACEncContext *s,
1507 const float *scaled,
int size,
int scale_idx,
1508 int cb,
const float lambda,
const float uplim,
1509 int *bits,
float *energy)
1515 static float get_band_cost_SQUAD_mips(
struct AACEncContext *s,
1517 const float *scaled,
int size,
int scale_idx,
1518 int cb,
const float lambda,
const float uplim,
1519 int *bits,
float *energy)
1525 float qenergy = 0.0f;
1526 int qc1, qc2, qc3, qc4;
1532 for (i = 0; i <
size; i += 4) {
1535 int *in_int = (
int *)&in[i];
1536 float *in_pos = (
float *)&in[i];
1537 float di0, di1, di2, di3;
1547 ".set noreorder \n\t"
1549 "slt %[qc1], $zero, %[qc1] \n\t"
1550 "slt %[qc2], $zero, %[qc2] \n\t"
1551 "slt %[qc3], $zero, %[qc3] \n\t"
1552 "slt %[qc4], $zero, %[qc4] \n\t"
1553 "lw %[t0], 0(%[in_int]) \n\t"
1554 "lw %[t1], 4(%[in_int]) \n\t"
1555 "lw %[t2], 8(%[in_int]) \n\t"
1556 "lw %[t3], 12(%[in_int]) \n\t"
1557 "srl %[t0], %[t0], 31 \n\t"
1558 "srl %[t1], %[t1], 31 \n\t"
1559 "srl %[t2], %[t2], 31 \n\t"
1560 "srl %[t3], %[t3], 31 \n\t"
1561 "subu %[t4], $zero, %[qc1] \n\t"
1562 "subu %[t5], $zero, %[qc2] \n\t"
1563 "subu %[t6], $zero, %[qc3] \n\t"
1564 "subu %[t7], $zero, %[qc4] \n\t"
1565 "movn %[qc1], %[t4], %[t0] \n\t"
1566 "movn %[qc2], %[t5], %[t1] \n\t"
1567 "movn %[qc3], %[t6], %[t2] \n\t"
1568 "movn %[qc4], %[t7], %[t3] \n\t"
1572 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1573 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1574 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1575 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1576 : [in_int]
"r"(in_int)
1589 curbits += p_bits[curidx];
1590 vec = &p_codes[curidx*4];
1592 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1593 + vec[2]*vec[2] + vec[3]*vec[3];
1597 ".set noreorder \n\t"
1599 "lwc1 $f0, 0(%[in_pos]) \n\t"
1600 "lwc1 $f1, 0(%[vec]) \n\t"
1601 "lwc1 $f2, 4(%[in_pos]) \n\t"
1602 "lwc1 $f3, 4(%[vec]) \n\t"
1603 "lwc1 $f4, 8(%[in_pos]) \n\t"
1604 "lwc1 $f5, 8(%[vec]) \n\t"
1605 "lwc1 $f6, 12(%[in_pos]) \n\t"
1606 "lwc1 $f7, 12(%[vec]) \n\t"
1607 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1608 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1609 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1610 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1614 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1615 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1616 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1618 :
"$f0",
"$f1",
"$f2",
"$f3",
1619 "$f4",
"$f5",
"$f6",
"$f7",
1623 cost += di0 * di0 + di1 * di1
1624 + di2 * di2 + di3 * di3;
1630 *energy = qenergy * (IQ*IQ);
1631 return cost * lambda + curbits;
1634 static float get_band_cost_UQUAD_mips(
struct AACEncContext *s,
1636 const float *scaled,
int size,
int scale_idx,
1637 int cb,
const float lambda,
const float uplim,
1638 int *bits,
float *energy)
1644 float qenergy = 0.0f;
1646 int qc1, qc2, qc3, qc4;
1651 for (i = 0; i <
size; i += 4) {
1654 float *in_pos = (
float *)&in[i];
1655 float di0, di1, di2, di3;
1665 ".set noreorder \n\t"
1667 "ori %[t4], $zero, 2 \n\t"
1668 "slt %[t0], %[t4], %[qc1] \n\t"
1669 "slt %[t1], %[t4], %[qc2] \n\t"
1670 "slt %[t2], %[t4], %[qc3] \n\t"
1671 "slt %[t3], %[t4], %[qc4] \n\t"
1672 "movn %[qc1], %[t4], %[t0] \n\t"
1673 "movn %[qc2], %[t4], %[t1] \n\t"
1674 "movn %[qc3], %[t4], %[t2] \n\t"
1675 "movn %[qc4], %[t4], %[t3] \n\t"
1679 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1680 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1681 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1693 curbits += p_bits[curidx];
1694 curbits += uquad_sign_bits[curidx];
1695 vec = &p_codes[curidx*4];
1697 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1698 + vec[2]*vec[2] + vec[3]*vec[3];
1702 ".set noreorder \n\t"
1704 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1705 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1706 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1707 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1708 "abs.s %[di0], %[di0] \n\t"
1709 "abs.s %[di1], %[di1] \n\t"
1710 "abs.s %[di2], %[di2] \n\t"
1711 "abs.s %[di3], %[di3] \n\t"
1712 "lwc1 $f0, 0(%[vec]) \n\t"
1713 "lwc1 $f1, 4(%[vec]) \n\t"
1714 "lwc1 $f2, 8(%[vec]) \n\t"
1715 "lwc1 $f3, 12(%[vec]) \n\t"
1716 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1717 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1718 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1719 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1723 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1724 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1725 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1727 :
"$f0",
"$f1",
"$f2",
"$f3",
1731 cost += di0 * di0 + di1 * di1
1732 + di2 * di2 + di3 * di3;
1738 *energy = qenergy * (IQ*IQ);
1739 return cost * lambda + curbits;
1742 static float get_band_cost_SPAIR_mips(
struct AACEncContext *s,
1744 const float *scaled,
int size,
int scale_idx,
1745 int cb,
const float lambda,
const float uplim,
1746 int *bits,
float *energy)
1752 float qenergy = 0.0f;
1753 int qc1, qc2, qc3, qc4;
1759 for (i = 0; i <
size; i += 4) {
1760 const float *vec, *vec2;
1761 int curidx, curidx2;
1762 int *in_int = (
int *)&in[i];
1763 float *in_pos = (
float *)&in[i];
1764 float di0, di1, di2, di3;
1774 ".set noreorder \n\t"
1776 "ori %[t4], $zero, 4 \n\t"
1777 "slt %[t0], %[t4], %[qc1] \n\t"
1778 "slt %[t1], %[t4], %[qc2] \n\t"
1779 "slt %[t2], %[t4], %[qc3] \n\t"
1780 "slt %[t3], %[t4], %[qc4] \n\t"
1781 "movn %[qc1], %[t4], %[t0] \n\t"
1782 "movn %[qc2], %[t4], %[t1] \n\t"
1783 "movn %[qc3], %[t4], %[t2] \n\t"
1784 "movn %[qc4], %[t4], %[t3] \n\t"
1785 "lw %[t0], 0(%[in_int]) \n\t"
1786 "lw %[t1], 4(%[in_int]) \n\t"
1787 "lw %[t2], 8(%[in_int]) \n\t"
1788 "lw %[t3], 12(%[in_int]) \n\t"
1789 "srl %[t0], %[t0], 31 \n\t"
1790 "srl %[t1], %[t1], 31 \n\t"
1791 "srl %[t2], %[t2], 31 \n\t"
1792 "srl %[t3], %[t3], 31 \n\t"
1793 "subu %[t4], $zero, %[qc1] \n\t"
1794 "subu %[t5], $zero, %[qc2] \n\t"
1795 "subu %[t6], $zero, %[qc3] \n\t"
1796 "subu %[t7], $zero, %[qc4] \n\t"
1797 "movn %[qc1], %[t4], %[t0] \n\t"
1798 "movn %[qc2], %[t5], %[t1] \n\t"
1799 "movn %[qc3], %[t6], %[t2] \n\t"
1800 "movn %[qc4], %[t7], %[t3] \n\t"
1804 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1805 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1806 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1807 [
t4]
"=&r"(
t4), [t5]
"=&r"(t5), [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
1808 : [in_int]
"r"(in_int)
1816 curidx2 += qc4 + 40;
1818 curbits += p_bits[curidx];
1819 curbits += p_bits[curidx2];
1821 vec = &p_codes[curidx*2];
1822 vec2 = &p_codes[curidx2*2];
1824 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1825 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1829 ".set noreorder \n\t"
1831 "lwc1 $f0, 0(%[in_pos]) \n\t"
1832 "lwc1 $f1, 0(%[vec]) \n\t"
1833 "lwc1 $f2, 4(%[in_pos]) \n\t"
1834 "lwc1 $f3, 4(%[vec]) \n\t"
1835 "lwc1 $f4, 8(%[in_pos]) \n\t"
1836 "lwc1 $f5, 0(%[vec2]) \n\t"
1837 "lwc1 $f6, 12(%[in_pos]) \n\t"
1838 "lwc1 $f7, 4(%[vec2]) \n\t"
1839 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1840 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1841 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1842 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1846 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1847 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1848 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1849 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1850 :
"$f0",
"$f1",
"$f2",
"$f3",
1851 "$f4",
"$f5",
"$f6",
"$f7",
1855 cost += di0 * di0 + di1 * di1
1856 + di2 * di2 + di3 * di3;
1862 *energy = qenergy * (IQ*IQ);
1863 return cost * lambda + curbits;
1866 static float get_band_cost_UPAIR7_mips(
struct AACEncContext *s,
1868 const float *scaled,
int size,
int scale_idx,
1869 int cb,
const float lambda,
const float uplim,
1870 int *bits,
float *energy)
1876 float qenergy = 0.0f;
1877 int qc1, qc2, qc3, qc4;
1883 for (i = 0; i <
size; i += 4) {
1884 const float *vec, *vec2;
1885 int curidx, curidx2, sign1, count1, sign2, count2;
1886 int *in_int = (
int *)&in[i];
1887 float *in_pos = (
float *)&in[i];
1888 float di0, di1, di2, di3;
1898 ".set noreorder \n\t"
1900 "ori %[t4], $zero, 7 \n\t"
1901 "ori %[sign1], $zero, 0 \n\t"
1902 "ori %[sign2], $zero, 0 \n\t"
1903 "slt %[t0], %[t4], %[qc1] \n\t"
1904 "slt %[t1], %[t4], %[qc2] \n\t"
1905 "slt %[t2], %[t4], %[qc3] \n\t"
1906 "slt %[t3], %[t4], %[qc4] \n\t"
1907 "movn %[qc1], %[t4], %[t0] \n\t"
1908 "movn %[qc2], %[t4], %[t1] \n\t"
1909 "movn %[qc3], %[t4], %[t2] \n\t"
1910 "movn %[qc4], %[t4], %[t3] \n\t"
1911 "lw %[t0], 0(%[in_int]) \n\t"
1912 "lw %[t1], 4(%[in_int]) \n\t"
1913 "lw %[t2], 8(%[in_int]) \n\t"
1914 "lw %[t3], 12(%[in_int]) \n\t"
1915 "slt %[t0], %[t0], $zero \n\t"
1916 "movn %[sign1], %[t0], %[qc1] \n\t"
1917 "slt %[t2], %[t2], $zero \n\t"
1918 "movn %[sign2], %[t2], %[qc3] \n\t"
1919 "slt %[t1], %[t1], $zero \n\t"
1920 "sll %[t0], %[sign1], 1 \n\t"
1921 "or %[t0], %[t0], %[t1] \n\t"
1922 "movn %[sign1], %[t0], %[qc2] \n\t"
1923 "slt %[t3], %[t3], $zero \n\t"
1924 "sll %[t0], %[sign2], 1 \n\t"
1925 "or %[t0], %[t0], %[t3] \n\t"
1926 "movn %[sign2], %[t0], %[qc4] \n\t"
1927 "slt %[count1], $zero, %[qc1] \n\t"
1928 "slt %[t1], $zero, %[qc2] \n\t"
1929 "slt %[count2], $zero, %[qc3] \n\t"
1930 "slt %[t2], $zero, %[qc4] \n\t"
1931 "addu %[count1], %[count1], %[t1] \n\t"
1932 "addu %[count2], %[count2], %[t2] \n\t"
1936 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
1937 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
1938 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
1939 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
1940 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
1942 : [in_int]
"r"(in_int)
1952 curbits += p_bits[curidx];
1953 curbits += upair7_sign_bits[curidx];
1954 vec = &p_codes[curidx*2];
1956 curbits += p_bits[curidx2];
1957 curbits += upair7_sign_bits[curidx2];
1958 vec2 = &p_codes[curidx2*2];
1960 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1961 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1965 ".set noreorder \n\t"
1967 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1968 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1969 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1970 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1971 "abs.s %[di0], %[di0] \n\t"
1972 "abs.s %[di1], %[di1] \n\t"
1973 "abs.s %[di2], %[di2] \n\t"
1974 "abs.s %[di3], %[di3] \n\t"
1975 "lwc1 $f0, 0(%[vec]) \n\t"
1976 "lwc1 $f1, 4(%[vec]) \n\t"
1977 "lwc1 $f2, 0(%[vec2]) \n\t"
1978 "lwc1 $f3, 4(%[vec2]) \n\t"
1979 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1980 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1981 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1982 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1986 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
1987 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
1988 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
1989 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
1990 :
"$f0",
"$f1",
"$f2",
"$f3",
1994 cost += di0 * di0 + di1 * di1
1995 + di2 * di2 + di3 * di3;
2001 *energy = qenergy * (IQ*IQ);
2002 return cost * lambda + curbits;
2005 static float get_band_cost_UPAIR12_mips(
struct AACEncContext *s,
2007 const float *scaled,
int size,
int scale_idx,
2008 int cb,
const float lambda,
const float uplim,
2009 int *bits,
float *energy)
2015 float qenergy = 0.0f;
2016 int qc1, qc2, qc3, qc4;
2022 for (i = 0; i <
size; i += 4) {
2023 const float *vec, *vec2;
2024 int curidx, curidx2;
2025 int sign1, count1, sign2, count2;
2026 int *in_int = (
int *)&in[i];
2027 float *in_pos = (
float *)&in[i];
2028 float di0, di1, di2, di3;
2038 ".set noreorder \n\t"
2040 "ori %[t4], $zero, 12 \n\t"
2041 "ori %[sign1], $zero, 0 \n\t"
2042 "ori %[sign2], $zero, 0 \n\t"
2043 "slt %[t0], %[t4], %[qc1] \n\t"
2044 "slt %[t1], %[t4], %[qc2] \n\t"
2045 "slt %[t2], %[t4], %[qc3] \n\t"
2046 "slt %[t3], %[t4], %[qc4] \n\t"
2047 "movn %[qc1], %[t4], %[t0] \n\t"
2048 "movn %[qc2], %[t4], %[t1] \n\t"
2049 "movn %[qc3], %[t4], %[t2] \n\t"
2050 "movn %[qc4], %[t4], %[t3] \n\t"
2051 "lw %[t0], 0(%[in_int]) \n\t"
2052 "lw %[t1], 4(%[in_int]) \n\t"
2053 "lw %[t2], 8(%[in_int]) \n\t"
2054 "lw %[t3], 12(%[in_int]) \n\t"
2055 "slt %[t0], %[t0], $zero \n\t"
2056 "movn %[sign1], %[t0], %[qc1] \n\t"
2057 "slt %[t2], %[t2], $zero \n\t"
2058 "movn %[sign2], %[t2], %[qc3] \n\t"
2059 "slt %[t1], %[t1], $zero \n\t"
2060 "sll %[t0], %[sign1], 1 \n\t"
2061 "or %[t0], %[t0], %[t1] \n\t"
2062 "movn %[sign1], %[t0], %[qc2] \n\t"
2063 "slt %[t3], %[t3], $zero \n\t"
2064 "sll %[t0], %[sign2], 1 \n\t"
2065 "or %[t0], %[t0], %[t3] \n\t"
2066 "movn %[sign2], %[t0], %[qc4] \n\t"
2067 "slt %[count1], $zero, %[qc1] \n\t"
2068 "slt %[t1], $zero, %[qc2] \n\t"
2069 "slt %[count2], $zero, %[qc3] \n\t"
2070 "slt %[t2], $zero, %[qc4] \n\t"
2071 "addu %[count1], %[count1], %[t1] \n\t"
2072 "addu %[count2], %[count2], %[t2] \n\t"
2076 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2077 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2078 [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
2079 [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
2080 [
t0]
"=&r"(
t0), [t1]
"=&r"(t1), [
t2]
"=&r"(
t2), [t3]
"=&r"(t3),
2082 : [in_int]
"r"(in_int)
2092 curbits += p_bits[curidx];
2093 curbits += p_bits[curidx2];
2094 curbits += upair12_sign_bits[curidx];
2095 curbits += upair12_sign_bits[curidx2];
2096 vec = &p_codes[curidx*2];
2097 vec2 = &p_codes[curidx2*2];
2099 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2100 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2104 ".set noreorder \n\t"
2106 "lwc1 %[di0], 0(%[in_pos]) \n\t"
2107 "lwc1 %[di1], 4(%[in_pos]) \n\t"
2108 "lwc1 %[di2], 8(%[in_pos]) \n\t"
2109 "lwc1 %[di3], 12(%[in_pos]) \n\t"
2110 "abs.s %[di0], %[di0] \n\t"
2111 "abs.s %[di1], %[di1] \n\t"
2112 "abs.s %[di2], %[di2] \n\t"
2113 "abs.s %[di3], %[di3] \n\t"
2114 "lwc1 $f0, 0(%[vec]) \n\t"
2115 "lwc1 $f1, 4(%[vec]) \n\t"
2116 "lwc1 $f2, 0(%[vec2]) \n\t"
2117 "lwc1 $f3, 4(%[vec2]) \n\t"
2118 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2119 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2120 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2121 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2125 : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
2126 [di2]
"=&f"(di2), [di3]
"=&f"(di3)
2127 : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
2128 [vec2]
"r"(vec2), [IQ]
"f"(IQ)
2129 :
"$f0",
"$f1",
"$f2",
"$f3",
2133 cost += di0 * di0 + di1 * di1
2134 + di2 * di2 + di3 * di3;
2140 *energy = qenergy * (IQ*IQ);
2141 return cost * lambda + curbits;
2144 static float get_band_cost_ESC_mips(
struct AACEncContext *s,
2146 const float *scaled,
int size,
int scale_idx,
2147 int cb,
const float lambda,
const float uplim,
2148 int *bits,
float *energy)
2152 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2155 float qenergy = 0.0f;
2156 int qc1, qc2, qc3, qc4;
2162 for (i = 0; i <
size; i += 4) {
2163 const float *vec, *vec2;
2164 int curidx, curidx2;
2166 float di1, di2, di3, di4;
2167 int cond0, cond1, cond2, cond3;
2178 ".set noreorder \n\t"
2180 "ori %[t6], $zero, 15 \n\t"
2181 "ori %[t7], $zero, 16 \n\t"
2182 "shll_s.w %[c1], %[qc1], 18 \n\t"
2183 "shll_s.w %[c2], %[qc2], 18 \n\t"
2184 "shll_s.w %[c3], %[qc3], 18 \n\t"
2185 "shll_s.w %[c4], %[qc4], 18 \n\t"
2186 "srl %[c1], %[c1], 18 \n\t"
2187 "srl %[c2], %[c2], 18 \n\t"
2188 "srl %[c3], %[c3], 18 \n\t"
2189 "srl %[c4], %[c4], 18 \n\t"
2190 "slt %[cond0], %[t6], %[qc1] \n\t"
2191 "slt %[cond1], %[t6], %[qc2] \n\t"
2192 "slt %[cond2], %[t6], %[qc3] \n\t"
2193 "slt %[cond3], %[t6], %[qc4] \n\t"
2194 "movn %[qc1], %[t7], %[cond0] \n\t"
2195 "movn %[qc2], %[t7], %[cond1] \n\t"
2196 "movn %[qc3], %[t7], %[cond2] \n\t"
2197 "movn %[qc4], %[t7], %[cond3] \n\t"
2201 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
2202 [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
2203 [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
2204 [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
2205 [
c1]
"=&r"(
c1), [c2]
"=&r"(c2),
2206 [c3]
"=&r"(c3), [c4]
"=&r"(c4),
2207 [
t6]
"=&r"(
t6), [t7]
"=&r"(t7)
2216 curbits += p_bits[curidx];
2217 curbits += esc_sign_bits[curidx];
2218 vec = &p_codes[curidx*2];
2220 curbits += p_bits[curidx2];
2221 curbits += esc_sign_bits[curidx2];
2222 vec2 = &p_codes[curidx2*2];
2224 curbits += (
av_log2(c1) * 2 - 3) & (-cond0);
2225 curbits += (
av_log2(c2) * 2 - 3) & (-cond1);
2226 curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
2227 curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
2230 t2 = fabsf(in[i+1]);
2231 t3 = fabsf(in[i+2]);
2232 t4 = fabsf(in[i+3]);
2235 if (t1 >= CLIPPED_ESCAPE) {
2236 di1 = t1 - CLIPPED_ESCAPE;
2237 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2239 di1 = t1 - (V = c1 *
cbrtf(c1) * IQ);
2243 di1 = t1 - (V = vec[0] * IQ);
2248 if (t2 >= CLIPPED_ESCAPE) {
2249 di2 = t2 - CLIPPED_ESCAPE;
2250 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2252 di2 = t2 - (V = c2 *
cbrtf(c2) * IQ);
2256 di2 = t2 - (V = vec[1] * IQ);
2261 if (t3 >= CLIPPED_ESCAPE) {
2262 di3 = t3 - CLIPPED_ESCAPE;
2263 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2265 di3 = t3 - (V = c3 *
cbrtf(c3) * IQ);
2269 di3 = t3 - (V = vec2[0] * IQ);
2274 if (t4 >= CLIPPED_ESCAPE) {
2275 di4 = t4 - CLIPPED_ESCAPE;
2276 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2278 di4 = t4 - (V = c4 *
cbrtf(c4) * IQ);
2282 di4 = t4 - (V = vec2[1]*IQ);
2286 cost += di1 * di1 + di2 * di2
2287 + di3 * di3 + di4 * di4;
2292 return cost * lambda + curbits;
2295 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
2297 const float *scaled,
int size,
int scale_idx,
2298 int cb,
const float lambda,
const float uplim,
2299 int *
bits,
float *energy) = {
2300 get_band_cost_ZERO_mips,
2301 get_band_cost_SQUAD_mips,
2302 get_band_cost_SQUAD_mips,
2303 get_band_cost_UQUAD_mips,
2304 get_band_cost_UQUAD_mips,
2305 get_band_cost_SPAIR_mips,
2306 get_band_cost_SPAIR_mips,
2307 get_band_cost_UPAIR7_mips,
2308 get_band_cost_UPAIR7_mips,
2309 get_band_cost_UPAIR12_mips,
2310 get_band_cost_UPAIR12_mips,
2311 get_band_cost_ESC_mips,
2312 get_band_cost_NONE_mips,
2313 get_band_cost_ZERO_mips,
2314 get_band_cost_ZERO_mips,
2315 get_band_cost_ZERO_mips,
2318 #define get_band_cost( \
2319 s, pb, in, scaled, size, scale_idx, cb, \
2320 lambda, uplim, bits, energy) \
2321 get_band_cost_arr[cb]( \
2322 s, pb, in, scaled, size, scale_idx, cb, \
2323 lambda, uplim, bits, energy)
2326 const float *scaled,
int size,
int scale_idx,
2327 int cb,
const float lambda,
const float uplim,
2328 int *bits,
float *energy,
int rtz)
2330 return get_band_cost(s,
NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2339 int start = 0, i, w, w2,
g, sid_sf_boost, prev_mid, prev_side;
2340 uint8_t nextband0[128], nextband1[128];
2341 float M[128],
S[128];
2343 const float lambda = s->
lambda;
2344 const float mslambda =
FFMIN(1.0f, lambda / 120.f);
2354 prev_mid = sce0->
sf_idx[0];
2355 prev_side = sce1->
sf_idx[0];
2363 float Mmax = 0.0f, Smax = 0.0f;
2368 M[i] = (sce0->
coeffs[start+(w+w2)*128+i]
2369 + sce1->
coeffs[start+(w+w2)*128+i]) * 0.5;
2371 - sce1->
coeffs[start+(w+w2)*128+i];
2376 Mmax =
FFMAX(Mmax, M34[i]);
2377 Smax =
FFMAX(Smax, S34[i]);
2381 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2382 float dist1 = 0.0f, dist2 = 0.0f;
2402 midcb =
FFMAX(1,midcb);
2403 sidcb =
FFMAX(1,sidcb);
2411 M[i] = (sce0->
coeffs[start+(w+w2)*128+i]
2412 + sce1->
coeffs[start+(w+w2)*128+i]) * 0.5;
2414 - sce1->
coeffs[start+(w+w2)*128+i];
2453 sce0->
sf_idx[w*16+
g] = mididx;
2454 sce1->
sf_idx[w*16+
g] = sididx;
2462 }
else if (
B1 > B0) {
2469 prev_mid = sce0->
sf_idx[w*16+
g];
2471 prev_side = sce1->
sf_idx[w*16+
g];
Band types following are encoded differently from others.
AAC encoder trellis codebook selector.
static void abs_pow34_v(float *out, const float *in, const int size)
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
#define SCALE_MAX_POS
scalefactor index maximum value
AACCoefficientsEncoder * coder
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
int prev_idx
pointer to the previous path point
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Spectral data are scaled white noise not coded in the bitstream.
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, energy, rtz)
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
static double cb(void *priv, double x, double y)
AACEncOptions options
encoding options
SingleChannelElement ch[2]
void ff_aac_coder_init_mips(AACEncContext *c)
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
single band psychoacoustic information
int num_swb
number of scalefactor window bands
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
const float *const ff_aac_codebook_vectors[]
float ff_aac_pow2sf_tab[428]
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
float ff_aac_pow34sf_tab[428]
int cur_channel
current channel for coder context
const uint8_t *const ff_aac_spectral_bits[11]
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
AAC definitions and structures.
AAC encoder twoloop coder.
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
Libavcodec external API header.
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
static int find_min_book(float maxval, int sf)
IndividualChannelStream ics
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
static av_always_inline float cbrtf(float x)
structure used in optimal codebook search
Replacements for frequently missing libm functions.
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
uint8_t zeroes[128]
band is not coded (used by encoder)
int sf_idx[128]
scalefactor indices (used by encoder)
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
Single Channel Element - used for both SCE and LFE elements.
ChannelElement * cpe
channel elements
channel element - generic struct for SCE/CPE/CCE/LFE
const uint16_t *const ff_aac_spectral_codes[11]
FFPsyChannel * ch
single channel information
enum BandType band_type[128]
band types
AAC encoder quantization misc reusable function templates.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
float scoefs[1024]
scaled coefficients