FFmpeg
opusenc_psy.c
Go to the documentation of this file.
1 /*
2  * Opus encoder
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "opusenc_psy.h"
23 #include "opus_pvq.h"
24 #include "opustab.h"
25 #include "mdct15.h"
26 #include "libavutil/qsort.h"
27 
28 static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band,
29  float *bits, float lambda)
30 {
31  int i, b = 0;
32  uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
33  const int band_size = ff_celt_freq_range[band] << f->size;
34  float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176];
35  float dist, cost, err_x = 0.0f, err_y = 0.0f;
36  float *X = buf;
37  float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size);
38  float *Y = (f->channels == 2) ? &buf[176] : NULL;
39  float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size);
41 
42  memcpy(X, X_orig, band_size*sizeof(float));
43  if (Y)
44  memcpy(Y, Y_orig, band_size*sizeof(float));
45 
46  f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1;
47  if (band <= f->coded_bands - 1) {
48  int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band);
49  b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14);
50  }
51 
52  if (f->dual_stereo) {
53  pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL,
54  f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]);
55 
56  pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL,
57  f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]);
58  } else {
59  pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size,
60  norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
61  }
62 
63  for (i = 0; i < band_size; i++) {
64  err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]);
65  if (Y)
66  err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]);
67  }
68 
69  dist = sqrtf(err_x) + sqrtf(err_y);
70  cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f;
71  *bits += cost;
72 
74 
75  return lambda*dist*cost;
76 }
77 
78 /* Populate metrics without taking into consideration neighbouring steps */
80 {
81  int silence = 0, ch, i, j;
82  OpusPsyStep *st = s->steps[index];
83 
84  st->index = index;
85 
86  for (ch = 0; ch < s->avctx->channels; ch++) {
87  const int lap_size = (1 << s->bsize_analysis);
88  for (i = 1; i <= FFMIN(lap_size, index); i++) {
89  const int offset = i*120;
90  AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i);
91  memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
92  }
93  for (i = 0; i < lap_size; i++) {
94  const int offset = i*120 + lap_size;
95  AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i);
96  memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
97  }
98 
100  (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1));
101 
102  s->mdct[s->bsize_analysis]->mdct(s->mdct[s->bsize_analysis], st->coeffs[ch], s->scratch, 1);
103 
104  for (i = 0; i < CELT_MAX_BANDS; i++)
105  st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis];
106  }
107 
108  for (ch = 0; ch < s->avctx->channels; ch++) {
109  for (i = 0; i < CELT_MAX_BANDS; i++) {
110  float avg_c_s, energy = 0.0f, dist_dev = 0.0f;
111  const int range = ff_celt_freq_range[i] << s->bsize_analysis;
112  const float *coeffs = st->bands[ch][i];
113  for (j = 0; j < range; j++)
114  energy += coeffs[j]*coeffs[j];
115 
116  st->energy[ch][i] += sqrtf(energy);
117  silence |= !!st->energy[ch][i];
118  avg_c_s = energy / range;
119 
120  for (j = 0; j < range; j++) {
121  const float c_s = coeffs[j]*coeffs[j];
122  dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s);
123  }
124 
125  st->tone[ch][i] += sqrtf(dist_dev);
126  }
127  }
128 
129  st->silence = !silence;
130 
131  if (s->avctx->channels > 1) {
132  for (i = 0; i < CELT_MAX_BANDS; i++) {
133  float incompat = 0.0f;
134  const float *coeffs1 = st->bands[0][i];
135  const float *coeffs2 = st->bands[1][i];
136  const int range = ff_celt_freq_range[i] << s->bsize_analysis;
137  for (j = 0; j < range; j++)
138  incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]);
139  st->stereo[i] = sqrtf(incompat);
140  }
141  }
142 
143  for (ch = 0; ch < s->avctx->channels; ch++) {
144  for (i = 0; i < CELT_MAX_BANDS; i++) {
145  OpusBandExcitation *ex = &s->ex[ch][i];
146  float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]);
147  bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e);
148  bp_e *= bp_e;
149  if (bp_e > ex->excitation) {
150  st->change_amp[ch][i] = bp_e - ex->excitation;
151  st->total_change += st->change_amp[ch][i];
152  ex->excitation = ex->excitation_init = bp_e;
153  ex->excitation_dist = 0.0f;
154  }
155  if (ex->excitation > 0.0f) {
156  ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09);
157  ex->excitation = FFMAX(ex->excitation, 0.0f);
158  ex->excitation_dist += 1.0f;
159  }
160  }
161  }
162 }
163 
164 static void search_for_change_points(OpusPsyContext *s, float tgt_change,
165  int offset_s, int offset_e, int resolution,
166  int level)
167 {
168  int i;
169  float c_change = 0.0f;
170  if ((offset_e - offset_s) <= resolution)
171  return;
172  for (i = offset_s; i < offset_e; i++) {
173  c_change += s->steps[i]->total_change;
174  if (c_change > tgt_change)
175  break;
176  }
177  if (i == offset_e)
178  return;
179  search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1);
181  search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1);
182 }
183 
185 {
186  int fsize, silent_frames;
187 
188  for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++)
189  if (!s->steps[silent_frames]->silence)
190  break;
191  if (--silent_frames < 0)
192  return 0;
193 
194  for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) {
195  if ((1 << fsize) > silent_frames)
196  continue;
197  s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize);
198  s->p.framesize = fsize;
199  return 1;
200  }
201 
202  return 0;
203 }
204 
205 /* Main function which decides frame size and frames per current packet */
207 {
208  int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000;
209  int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
210 
211  /* These don't change for now */
212  s->p.mode = OPUS_MODE_CELT;
214 
215  /* Flush silent frames ASAP */
216  if (s->steps[0]->silence && flush_silent_frames(s))
217  return;
218 
219  s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960);
220  s->p.frames = 1;
221 }
222 
224 {
225  int i;
226  float total_energy_change = 0.0f;
227 
228  if (s->buffered_steps < s->max_steps && !s->eof) {
229  const int awin = (1 << s->bsize_analysis);
230  if (++s->steps_to_process >= awin) {
231  step_collect_psy_metrics(s, s->buffered_steps - awin + 1);
232  s->steps_to_process = 0;
233  }
234  if ((++s->buffered_steps) < s->max_steps)
235  return 1;
236  }
237 
238  for (i = 0; i < s->buffered_steps; i++)
239  total_energy_change += s->steps[i]->total_change;
240 
241  search_for_change_points(s, total_energy_change / 2.0f, 0,
242  s->buffered_steps, 1, 0);
243 
245 
246  p->frames = s->p.frames;
247  p->framesize = s->p.framesize;
248  p->mode = s->p.mode;
249  p->bandwidth = s->p.bandwidth;
250 
251  return 0;
252 }
253 
255 {
256  int i, neighbouring_points = 0, start_offset = 0;
257  int radius = (1 << s->p.framesize), step_offset = radius*index;
258  int silence = 1;
259 
260  f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0;
262  f->channels = s->avctx->channels;
263  f->size = s->p.framesize;
264 
265  for (i = 0; i < (1 << f->size); i++)
266  silence &= s->steps[index*(1 << f->size) + i]->silence;
267 
268  f->silence = silence;
269  if (f->silence) {
270  f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */
271  return;
272  }
273 
274  for (i = 0; i < s->inflection_points_count; i++) {
275  if (s->inflection_points[i] >= step_offset) {
276  start_offset = i;
277  break;
278  }
279  }
280 
281  for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) {
282  if (s->inflection_points[i] < (step_offset + radius)) {
283  neighbouring_points++;
284  }
285  }
286 
287  /* Transient flagging */
288  f->transient = neighbouring_points > 0;
290 
291  /* Some sane defaults */
292  f->pfilter = 0;
293  f->pf_gain = 0.5f;
294  f->pf_octave = 2;
295  f->pf_period = 1;
296  f->pf_tapset = 2;
297 
298  /* More sane defaults */
299  f->tf_select = 0;
300  f->anticollapse = 1;
301  f->alloc_trim = 5;
302  f->skip_band_floor = f->end_band;
303  f->intensity_stereo = f->end_band;
304  f->dual_stereo = 0;
306  memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
307  memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
308 }
309 
311  CeltFrame *f_out)
312 {
313  int i, f, ch;
315  float rate, frame_bits = 0;
316 
317  /* Used for the global ROTATE flag */
318  float tonal = 0.0f;
319 
320  /* Pseudo-weights */
321  float band_score[CELT_MAX_BANDS] = { 0 };
322  float max_score = 1.0f;
323 
324  /* Pass one - one loop around each band, computing unquant stuff */
325  for (i = 0; i < CELT_MAX_BANDS; i++) {
326  float weight = 0.0f;
327  float tonal_contrib = 0.0f;
328  for (f = 0; f < (1 << s->p.framesize); f++) {
329  weight = start[f]->stereo[i];
330  for (ch = 0; ch < s->avctx->channels; ch++) {
331  weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i];
332  tonal_contrib += start[f]->tone[ch][i];
333  }
334  }
335  tonal += tonal_contrib;
336  band_score[i] = weight;
337  }
338 
339  tonal /= (float)CELT_MAX_BANDS;
340 
341  for (i = 0; i < CELT_MAX_BANDS; i++) {
342  if (band_score[i] > max_score)
343  max_score = band_score[i];
344  }
345 
346  for (i = 0; i < CELT_MAX_BANDS; i++) {
347  f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f);
348  frame_bits += band_score[i]*8.0f;
349  }
350 
351  tonal /= 1333136.0f;
352  f_out->spread = av_clip_uintp2(lrintf(tonal), 2);
353 
354  rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16;
355  rate *= s->lambda;
356  rate /= s->avctx->sample_rate/frame_size;
357 
358  f_out->framebits = lrintf(rate);
359  f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_PACKET_SIZE*8);
360  f_out->framebits = FFALIGN(f_out->framebits, 8);
361 }
362 
363 static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
364 {
365  int i, tdist = 0.0f;
366  OpusRangeCoder dump;
367 
368  ff_opus_rc_enc_init(&dump);
369  ff_celt_bitalloc(f, &dump, 1);
370 
371  for (i = 0; i < CELT_MAX_BANDS; i++) {
372  float bits = 0.0f;
373  float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
374  tdist += dist;
375  }
376 
377  *total_dist = tdist;
378 
379  return 0;
380 }
381 
383 {
384  float td1, td2;
385  f->dual_stereo = 0;
386 
387  if (s->avctx->channels < 2)
388  return;
389 
390  bands_dist(s, f, &td1);
391  f->dual_stereo = 1;
392  bands_dist(s, f, &td2);
393 
394  f->dual_stereo = td2 < td1;
395  s->dual_stereo_used += td2 < td1;
396 }
397 
399 {
400  int i, best_band = CELT_MAX_BANDS - 1;
401  float dist, best_dist = FLT_MAX;
402  /* TODO: fix, make some heuristic up here using the lambda value */
403  float end_band = 0;
404 
405  if (s->avctx->channels < 2)
406  return;
407 
408  for (i = f->end_band; i >= end_band; i--) {
409  f->intensity_stereo = i;
410  bands_dist(s, f, &dist);
411  if (best_dist > dist) {
412  best_dist = dist;
413  best_band = i;
414  }
415  }
416 
417  f->intensity_stereo = best_band;
418  s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f;
419 }
420 
422 {
423  int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } };
424  float score[2] = { 0 };
425 
426  for (cway = 0; cway < 2; cway++) {
427  int mag[2];
428  int base = f->transient ? 120 : 960;
429 
430  for (i = 0; i < 2; i++) {
431  int c = ff_celt_tf_select[f->size][f->transient][cway][i];
432  mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c);
433  }
434 
435  for (i = 0; i < CELT_MAX_BANDS; i++) {
436  float iscore0 = 0.0f;
437  float iscore1 = 0.0f;
438  for (j = 0; j < (1 << f->size); j++) {
439  for (k = 0; k < s->avctx->channels; k++) {
440  iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0];
441  iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1];
442  }
443  }
444  config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f);
445  score[cway] += config[cway][i] ? iscore1 : iscore0;
446  }
447  }
448 
449  f->tf_select = score[0] < score[1];
450  memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS);
451 
452  return 0;
453 }
454 
456 {
457  int start_transient_flag = f->transient;
458  OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)];
459 
460  if (f->silence)
461  return 0;
462 
463  celt_gauge_psy_weight(s, start, f);
466  celt_search_for_tf(s, start, f);
467 
468  if (f->transient != start_transient_flag) {
470  s->redo_analysis = 1;
471  return 1;
472  }
473 
474  s->redo_analysis = 0;
475 
476  return 0;
477 }
478 
480 {
482  int steps_out = s->p.frames*(frame_size/120);
483  void *tmp[FF_BUFQUEUE_SIZE];
484  float ideal_fbits;
485 
486  for (i = 0; i < steps_out; i++)
487  memset(s->steps[i], 0, sizeof(OpusPsyStep));
488 
489  for (i = 0; i < s->max_steps; i++)
490  tmp[i] = s->steps[i];
491 
492  for (i = 0; i < s->max_steps; i++) {
493  const int i_new = i - steps_out;
494  s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i];
495  }
496 
497  for (i = steps_out; i < s->buffered_steps; i++)
498  s->steps[i]->index -= steps_out;
499 
500  ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size);
501 
502  for (i = 0; i < s->p.frames; i++) {
503  s->avg_is_band += f[i].intensity_stereo;
504  s->lambda *= ideal_fbits / f[i].framebits;
505  }
506 
507  s->avg_is_band /= (s->p.frames + 1);
508 
509  s->cs_num = 0;
510  s->steps_to_process = 0;
511  s->buffered_steps -= steps_out;
512  s->total_packets_out += s->p.frames;
514 }
515 
517  struct FFBufQueue *bufqueue, OpusEncOptions *options)
518 {
519  int i, ch, ret;
520 
521  s->redo_analysis = 0;
522  s->lambda = 1.0f;
523  s->options = options;
524  s->avctx = avctx;
525  s->bufqueue = bufqueue;
526  s->max_steps = ceilf(s->options->max_delay_ms/2.5f);
528  s->avg_is_band = CELT_MAX_BANDS - 1;
530 
532  if (!s->inflection_points) {
533  ret = AVERROR(ENOMEM);
534  goto fail;
535  }
536 
538  if (!s->dsp) {
539  ret = AVERROR(ENOMEM);
540  goto fail;
541  }
542 
543  for (ch = 0; ch < s->avctx->channels; ch++) {
544  for (i = 0; i < CELT_MAX_BANDS; i++) {
545  bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1);
546  bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0);
547  }
548  }
549 
550  for (i = 0; i < s->max_steps; i++) {
551  s->steps[i] = av_mallocz(sizeof(OpusPsyStep));
552  if (!s->steps[i]) {
553  ret = AVERROR(ENOMEM);
554  goto fail;
555  }
556  }
557 
558  for (i = 0; i < CELT_BLOCK_NB; i++) {
559  float tmp;
560  const int len = OPUS_BLOCK_SIZE(i);
561  s->window[i] = av_malloc(2*len*sizeof(float));
562  if (!s->window[i]) {
563  ret = AVERROR(ENOMEM);
564  goto fail;
565  }
566  generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp);
567  if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
568  goto fail;
569  }
570 
571  return 0;
572 
573 fail:
575  av_freep(&s->dsp);
576 
577  for (i = 0; i < CELT_BLOCK_NB; i++) {
578  ff_mdct15_uninit(&s->mdct[i]);
579  av_freep(&s->window[i]);
580  }
581 
582  for (i = 0; i < s->max_steps; i++)
583  av_freep(&s->steps[i]);
584 
585  return ret;
586 }
587 
589 {
590  s->eof = 1;
591 }
592 
594 {
595  int i;
596 
598  av_freep(&s->dsp);
599 
600  for (i = 0; i < CELT_BLOCK_NB; i++) {
601  ff_mdct15_uninit(&s->mdct[i]);
602  av_freep(&s->window[i]);
603  }
604 
605  for (i = 0; i < s->max_steps; i++)
606  av_freep(&s->steps[i]);
607 
608  av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band);
609  av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f);
610 
611  return 0;
612 }
int channels
Definition: opus_celt.h:100
float max_delay_ms
Definition: opusenc.h:44
MDCT15Context * mdct[CELT_BLOCK_NB]
Definition: opusenc_psy.h:71
void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
Definition: opusenc_psy.c:254
static int flush_silent_frames(OpusPsyContext *s)
Definition: opusenc_psy.c:184
#define OPUS_SAMPLES_TO_BLOCK_SIZE(x)
Definition: opusenc.h:41
#define NULL
Definition: coverity.c:32
AVCodecContext * avctx
Definition: opusenc_psy.h:55
int64_t total_packets_out
Definition: opusenc_psy.h:80
int anticollapse
Definition: opus_celt.h:118
enum OpusBandwidth bandwidth
Definition: opusenc.h:49
struct FFBufQueue * bufqueue
Definition: opusenc_psy.h:57
#define OPUS_RC_CHECKPOINT_SPAWN(rc)
Definition: opus_rc.h:116
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
int framebits
Definition: opus_celt.h:132
float * window[CELT_BLOCK_NB]
Definition: opusenc_psy.h:70
void ff_opus_rc_enc_init(OpusRangeCoder *rc)
Definition: opus_rc.c:402
int remaining2
Definition: opus_celt.h:134
float coeffs[CELT_MAX_FRAME_SIZE]
Definition: opus_celt.h:75
int64_t bit_rate
the average bitrate
Definition: avcodec.h:1615
OpusPsyStep * steps[FF_BUFQUEUE_SIZE+1]
Definition: opusenc_psy.h:67
FFBesselFilter bfilter_hi[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:65
FFBesselFilter bfilter_lo[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:64
const uint8_t ff_celt_freq_bands[]
Definition: opustab.c:763
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0f/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(INT64_C(1)<< 63)))#define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};static void cpy1(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, len);}static void cpy2(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 2 *len);}static void cpy4(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 4 *len);}static void cpy8(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;}}if(HAVE_X86ASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;}void swri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len){int ch;int off=0;const int os=(out->planar?1:out->ch_count)*out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){int planes=in->planar?in->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){int planes=out->planar?out->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){int planes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
static void generate_window_func(float *lut, int N, int win_func, float *overlap)
Definition: window_func.h:36
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define OPUS_MAX_PACKET_SIZE
Definition: opus_rc.h:29
int pf_period
Definition: opus_celt.h:127
uint8_t base
Definition: vp3data.h:202
Structure holding the queue.
Definition: bufferqueue.h:49
const uint8_t ff_celt_band_end[]
Definition: opustab.c:27
float coeffs[OPUS_MAX_CHANNELS][OPUS_BLOCK_SIZE(CELT_BLOCK_960)]
Definition: opusenc_psy.h:40
CeltBlock block[2]
Definition: opus_celt.h:97
OpusEncOptions * options
Definition: opusenc_psy.h:58
static float bessel_filter(FFBesselFilter *s, float x)
Definition: opusenc_utils.h:76
#define av_cold
Definition: attributes.h:82
#define CELT_OVERLAP
Definition: opus.h:42
#define av_malloc(s)
int silence
Definition: opus_celt.h:116
int * inflection_points
Definition: opusenc_psy.h:90
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
#define f(width, name)
Definition: cbs_vp9.c:255
float stereo[CELT_MAX_BANDS]
Definition: opusenc_psy.h:35
void(* mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
Definition: mdct15.h:49
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats...
Definition: float_dsp.h:38
int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
Definition: opusenc_psy.c:223
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
Definition: mdct15.c:247
int dual_stereo
Definition: opus_celt.h:120
int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
Definition: opusenc_psy.c:455
enum OpusMode mode
Definition: opusenc.h:48
#define lrintf(x)
Definition: libm_mips.h:70
int coded_bands
Definition: opus_celt.h:107
int skip_band_floor
Definition: opus_celt.h:110
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
#define cm
Definition: dvbsubdec.c:37
int end_band
Definition: opus_celt.h:106
#define expf(x)
Definition: libm.h:283
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int alloc_boost[CELT_MAX_BANDS]
Definition: opus_celt.h:113
static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
Definition: opusenc_psy.c:363
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
AVFloatDSPContext * dsp
Definition: opusenc_psy.h:56
#define OPUS_BLOCK_SIZE(x)
Definition: opusenc.h:39
int start_band
Definition: opus_celt.h:105
static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, float *bits, float lambda)
Definition: opusenc_psy.c:28
#define OPUS_RC_CHECKPOINT_ROLLBACK(rc)
Definition: opus_rc.h:123
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:1645
av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, struct FFBufQueue *bufqueue, OpusEncOptions *options)
Definition: opusenc_psy.c:516
#define FF_BUFQUEUE_SIZE
int tf_change[CELT_MAX_BANDS]
Definition: opus_celt.h:139
uint8_t bits
Definition: vp3data.h:202
float total_change
Definition: opusenc_psy.h:37
int pulses[CELT_MAX_BANDS]
Definition: opus_celt.h:138
int pfilter
Definition: opus_celt.h:109
#define FFMAX(a, b)
Definition: common.h:94
int anticollapse_needed
Definition: opus_celt.h:117
#define fail()
Definition: checkasm.h:120
float pf_gain
Definition: opus_celt.h:129
static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f)
Definition: opusenc_psy.c:382
float * bands[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:39
#define b
Definition: input.c:41
const int8_t ff_celt_tf_select[4][2][2][2]
Definition: opustab.c:777
static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f_out)
Definition: opusenc_psy.c:310
#define Y
Definition: boxblur.h:38
static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f)
Definition: opusenc_psy.c:421
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:908
QUANT_FN * quant_band
Definition: opus_pvq.h:40
#define FFMIN(a, b)
Definition: common.h:96
float tone[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:34
float change_amp[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:36
#define OPUS_RC_CHECKPOINT_BITS(rc)
Definition: opus_rc.h:120
static void psy_output_groups(OpusPsyContext *s)
Definition: opusenc_psy.c:206
float avg_is_band
Definition: opusenc_psy.h:78
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define s(width, name)
Definition: cbs_vp9.c:257
int blocks
Definition: opus_celt.h:114
int inflection_points_count
Definition: opusenc_psy.h:91
int transient
Definition: opus_celt.h:108
const uint8_t ff_celt_freq_range[]
Definition: opustab.c:767
#define AV_LOG_INFO
Standard information.
Definition: log.h:187
int frame_size
Definition: mxfenc.c:2216
#define CELT_MAX_BANDS
Definition: opus.h:45
int sample_rate
samples per second
Definition: avcodec.h:2225
int pf_tapset
Definition: opus_celt.h:128
main external API structure.
Definition: avcodec.h:1565
static int bessel_init(FFBesselFilter *s, float n, float f0, float fs, int highpass)
Definition: opusenc_utils.h:69
The official guide to swscale for confused that consecutive non overlapping rectangles of slice_bottom special converter These generally are unscaled converters of common like for each output line the vertical scaler pulls lines from a ring buffer When the ring buffer does not contain the wanted then it is pulled from the input slice through the input converter and horizontal scaler The result is also stored in the ring buffer to serve future vertical scaler requests When no more output can be generated because lines from a future slice would be then all remaining lines in the current slice are horizontally scaled and put in the ring buffer[This is done for luma and chroma, each with possibly different numbers of lines per picture.] Input to YUV Converter When the input to the main path is not planar bits per component YUV or bit it is converted to planar bit YUV Two sets of converters exist for this the other leaves the full chroma resolution
Definition: swscale.txt:33
void * buf
Definition: avisynth_c.h:766
void ff_opus_psy_signal_eof(OpusPsyContext *s)
Definition: opusenc_psy.c:588
float scratch[2048]
Definition: opusenc_psy.h:74
int index
Definition: gxfenc.c:89
static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f)
Definition: opusenc_psy.c:398
int64_t dual_stereo_used
Definition: opusenc_psy.h:79
OpusPacketInfo p
Definition: opusenc_psy.h:84
void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode)
Definition: opus.c:555
av_cold int ff_opus_psy_end(OpusPsyContext *s)
Definition: opusenc_psy.c:593
CeltPVQ * pvq
Definition: opus_celt.h:98
void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc)
Definition: opusenc_psy.c:479
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1562
int framesize
Definition: opusenc.h:50
uint8_t level
Definition: svq3.c:207
int remaining
Definition: opus_celt.h:133
int
const OptionDef options[]
Definition: ffmpeg_opt.c:3364
enum CeltSpread spread
Definition: opus_celt.h:123
av_cold void ff_mdct15_uninit(MDCT15Context **ps)
Definition: mdct15.c:43
int tf_select
Definition: opus_celt.h:111
int len
static void step_collect_psy_metrics(OpusPsyContext *s, int index)
Definition: opusenc_psy.c:79
int channels
number of audio channels
Definition: avcodec.h:2226
#define av_freep(p)
void INT64 start
Definition: avisynth_c.h:766
float energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:33
static int64_t fsize(FILE *f)
Definition: audiomatch.c:28
static void search_for_change_points(OpusPsyContext *s, float tgt_change, int offset_s, int offset_e, int resolution, int level)
Definition: opusenc_psy.c:164
enum CeltBlockSize size
Definition: opus_celt.h:104
int alloc_trim
Definition: opus_celt.h:112
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:342
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:361
for(j=16;j >0;--j)
int pf_octave
Definition: opus_celt.h:126
int intensity_stereo
Definition: opus_celt.h:119
OpusBandExcitation ex[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: opusenc_psy.h:63
static av_always_inline uint32_t opus_rc_tell_frac(const OpusRangeCoder *rc)
Definition: opus_rc.h:66
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
static uint8_t tmp[11]
Definition: aes_ctr.c:26
int steps_to_process
Definition: opusenc_psy.h:87