FFmpeg
af_atempo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * tempo scaling audio filter -- an implementation of WSOLA algorithm
24  *
25  * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
26  * from Apprentice Video player by Pavel Koshevoy.
27  * https://sourceforge.net/projects/apprenticevideo/
28  *
29  * An explanation of SOLA algorithm is available at
30  * http://www.surina.net/article/time-and-pitch-scaling.html
31  *
32  * WSOLA is very similar to SOLA, only one major difference exists between
33  * these algorithms. SOLA shifts audio fragments along the output stream,
34  * where as WSOLA shifts audio fragments along the input stream.
35  *
36  * The advantage of WSOLA algorithm is that the overlap region size is
37  * always the same, therefore the blending function is constant and
38  * can be precomputed.
39  */
40 
41 #include <float.h>
42 #include "libavcodec/avfft.h"
43 #include "libavutil/avassert.h"
44 #include "libavutil/avstring.h"
46 #include "libavutil/eval.h"
47 #include "libavutil/opt.h"
48 #include "libavutil/samplefmt.h"
49 #include "avfilter.h"
50 #include "audio.h"
51 #include "internal.h"
52 
53 /**
54  * A fragment of audio waveform
55  */
56 typedef struct AudioFragment {
57  // index of the first sample of this fragment in the overall waveform;
58  // 0: input sample position
59  // 1: output sample position
60  int64_t position[2];
61 
62  // original packed multi-channel samples:
64 
65  // number of samples in this fragment:
66  int nsamples;
67 
68  // rDFT transform of the down-mixed mono fragment, used for
69  // fast waveform alignment via correlation in frequency domain:
72 
73 /**
74  * Filter state machine states
75  */
76 typedef enum {
82 } FilterState;
83 
84 /**
85  * Filter state machine
86  */
87 typedef struct ATempoContext {
88  const AVClass *class;
89 
90  // ring-buffer of input samples, necessary because some times
91  // input fragment position may be adjusted backwards:
93 
94  // ring-buffer maximum capacity, expressed in sample rate time base:
95  int ring;
96 
97  // ring-buffer house keeping:
98  int size;
99  int head;
100  int tail;
101 
102  // 0: input sample position corresponding to the ring buffer tail
103  // 1: output sample position
104  int64_t position[2];
105 
106  // first input timestamp, all other timestamps are offset by this one
107  int64_t start_pts;
108 
109  // sample format:
111 
112  // number of channels:
113  int channels;
114 
115  // row of bytes to skip from one sample to next, across multple channels;
116  // stride = (number-of-channels * bits-per-sample-per-channel) / 8
117  int stride;
118 
119  // fragment window size, power-of-two integer:
120  int window;
121 
122  // Hann window coefficients, for feathering
123  // (blending) the overlapping fragment region:
124  float *hann;
125 
126  // tempo scaling factor:
127  double tempo;
128 
129  // a snapshot of previous fragment input and output position values
130  // captured when the tempo scale factor was set most recently:
131  int64_t origin[2];
132 
133  // current/previous fragment ring-buffer:
135 
136  // current fragment index:
137  uint64_t nfrag;
138 
139  // current state:
141 
142  // for fast correlation calculation in frequency domain:
146 
147  // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
151  uint64_t nsamples_in;
152  uint64_t nsamples_out;
153 } ATempoContext;
154 
155 #define YAE_ATEMPO_MIN 0.5
156 #define YAE_ATEMPO_MAX 100.0
157 
158 #define OFFSET(x) offsetof(ATempoContext, x)
159 
160 static const AVOption atempo_options[] = {
161  { "tempo", "set tempo scale factor",
162  OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
166  { NULL }
167 };
168 
169 AVFILTER_DEFINE_CLASS(atempo);
170 
172 {
173  return &atempo->frag[atempo->nfrag % 2];
174 }
175 
177 {
178  return &atempo->frag[(atempo->nfrag + 1) % 2];
179 }
180 
181 /**
182  * Reset filter to initial state, do not deallocate existing local buffers.
183  */
184 static void yae_clear(ATempoContext *atempo)
185 {
186  atempo->size = 0;
187  atempo->head = 0;
188  atempo->tail = 0;
189 
190  atempo->nfrag = 0;
191  atempo->state = YAE_LOAD_FRAGMENT;
192  atempo->start_pts = AV_NOPTS_VALUE;
193 
194  atempo->position[0] = 0;
195  atempo->position[1] = 0;
196 
197  atempo->origin[0] = 0;
198  atempo->origin[1] = 0;
199 
200  atempo->frag[0].position[0] = 0;
201  atempo->frag[0].position[1] = 0;
202  atempo->frag[0].nsamples = 0;
203 
204  atempo->frag[1].position[0] = 0;
205  atempo->frag[1].position[1] = 0;
206  atempo->frag[1].nsamples = 0;
207 
208  // shift left position of 1st fragment by half a window
209  // so that no re-normalization would be required for
210  // the left half of the 1st fragment:
211  atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
212  atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
213 
214  av_frame_free(&atempo->dst_buffer);
215  atempo->dst = NULL;
216  atempo->dst_end = NULL;
217 
218  atempo->nsamples_in = 0;
219  atempo->nsamples_out = 0;
220 }
221 
222 /**
223  * Reset filter to initial state and deallocate all buffers.
224  */
225 static void yae_release_buffers(ATempoContext *atempo)
226 {
227  yae_clear(atempo);
228 
229  av_freep(&atempo->frag[0].data);
230  av_freep(&atempo->frag[1].data);
231  av_freep(&atempo->frag[0].xdat);
232  av_freep(&atempo->frag[1].xdat);
233 
234  av_freep(&atempo->buffer);
235  av_freep(&atempo->hann);
236  av_freep(&atempo->correlation);
237 
238  av_rdft_end(atempo->real_to_complex);
239  atempo->real_to_complex = NULL;
240 
241  av_rdft_end(atempo->complex_to_real);
242  atempo->complex_to_real = NULL;
243 }
244 
245 /* av_realloc is not aligned enough; fortunately, the data does not need to
246  * be preserved */
247 #define RE_MALLOC_OR_FAIL(field, field_size) \
248  do { \
249  av_freep(&field); \
250  field = av_malloc(field_size); \
251  if (!field) { \
252  yae_release_buffers(atempo); \
253  return AVERROR(ENOMEM); \
254  } \
255  } while (0)
256 
257 /**
258  * Prepare filter for processing audio data of given format,
259  * sample rate and number of channels.
260  */
261 static int yae_reset(ATempoContext *atempo,
262  enum AVSampleFormat format,
263  int sample_rate,
264  int channels)
265 {
266  const int sample_size = av_get_bytes_per_sample(format);
267  uint32_t nlevels = 0;
268  uint32_t pot;
269  int i;
270 
271  atempo->format = format;
272  atempo->channels = channels;
273  atempo->stride = sample_size * channels;
274 
275  // pick a segment window size:
276  atempo->window = sample_rate / 24;
277 
278  // adjust window size to be a power-of-two integer:
279  nlevels = av_log2(atempo->window);
280  pot = 1 << nlevels;
281  av_assert0(pot <= atempo->window);
282 
283  if (pot < atempo->window) {
284  atempo->window = pot * 2;
285  nlevels++;
286  }
287 
288  // initialize audio fragment buffers:
289  RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
290  RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
291  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
292  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
293 
294  // initialize rDFT contexts:
295  av_rdft_end(atempo->real_to_complex);
296  atempo->real_to_complex = NULL;
297 
298  av_rdft_end(atempo->complex_to_real);
299  atempo->complex_to_real = NULL;
300 
301  atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
302  if (!atempo->real_to_complex) {
303  yae_release_buffers(atempo);
304  return AVERROR(ENOMEM);
305  }
306 
307  atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
308  if (!atempo->complex_to_real) {
309  yae_release_buffers(atempo);
310  return AVERROR(ENOMEM);
311  }
312 
313  RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
314 
315  atempo->ring = atempo->window * 3;
316  RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
317 
318  // initialize the Hann window function:
319  RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float));
320 
321  for (i = 0; i < atempo->window; i++) {
322  double t = (double)i / (double)(atempo->window - 1);
323  double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
324  atempo->hann[i] = (float)h;
325  }
326 
327  yae_clear(atempo);
328  return 0;
329 }
330 
331 static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo)
332 {
333  const AudioFragment *prev;
334  ATempoContext *atempo = ctx->priv;
335  char *tail = NULL;
336  double tempo = av_strtod(arg_tempo, &tail);
337 
338  if (tail && *tail) {
339  av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo);
340  return AVERROR(EINVAL);
341  }
342 
343  if (tempo < YAE_ATEMPO_MIN || tempo > YAE_ATEMPO_MAX) {
344  av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [%f, %f] range\n",
346  return AVERROR(EINVAL);
347  }
348 
349  prev = yae_prev_frag(atempo);
350  atempo->origin[0] = prev->position[0] + atempo->window / 2;
351  atempo->origin[1] = prev->position[1] + atempo->window / 2;
352  atempo->tempo = tempo;
353  return 0;
354 }
355 
356 /**
357  * A helper macro for initializing complex data buffer with scalar data
358  * of a given type.
359  */
360 #define yae_init_xdat(scalar_type, scalar_max) \
361  do { \
362  const uint8_t *src_end = src + \
363  frag->nsamples * atempo->channels * sizeof(scalar_type); \
364  \
365  FFTSample *xdat = frag->xdat; \
366  scalar_type tmp; \
367  \
368  if (atempo->channels == 1) { \
369  for (; src < src_end; xdat++) { \
370  tmp = *(const scalar_type *)src; \
371  src += sizeof(scalar_type); \
372  \
373  *xdat = (FFTSample)tmp; \
374  } \
375  } else { \
376  FFTSample s, max, ti, si; \
377  int i; \
378  \
379  for (; src < src_end; xdat++) { \
380  tmp = *(const scalar_type *)src; \
381  src += sizeof(scalar_type); \
382  \
383  max = (FFTSample)tmp; \
384  s = FFMIN((FFTSample)scalar_max, \
385  (FFTSample)fabsf(max)); \
386  \
387  for (i = 1; i < atempo->channels; i++) { \
388  tmp = *(const scalar_type *)src; \
389  src += sizeof(scalar_type); \
390  \
391  ti = (FFTSample)tmp; \
392  si = FFMIN((FFTSample)scalar_max, \
393  (FFTSample)fabsf(ti)); \
394  \
395  if (s < si) { \
396  s = si; \
397  max = ti; \
398  } \
399  } \
400  \
401  *xdat = max; \
402  } \
403  } \
404  } while (0)
405 
406 /**
407  * Initialize complex data buffer of a given audio fragment
408  * with down-mixed mono data of appropriate scalar type.
409  */
410 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
411 {
412  // shortcuts:
413  const uint8_t *src = frag->data;
414 
415  // init complex data buffer used for FFT and Correlation:
416  memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
417 
418  if (atempo->format == AV_SAMPLE_FMT_U8) {
419  yae_init_xdat(uint8_t, 127);
420  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
421  yae_init_xdat(int16_t, 32767);
422  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
423  yae_init_xdat(int, 2147483647);
424  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
425  yae_init_xdat(float, 1);
426  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
427  yae_init_xdat(double, 1);
428  }
429 }
430 
431 /**
432  * Populate the internal data buffer on as-needed basis.
433  *
434  * @return
435  * 0 if requested data was already available or was successfully loaded,
436  * AVERROR(EAGAIN) if more input data is required.
437  */
438 static int yae_load_data(ATempoContext *atempo,
439  const uint8_t **src_ref,
440  const uint8_t *src_end,
441  int64_t stop_here)
442 {
443  // shortcut:
444  const uint8_t *src = *src_ref;
445  const int read_size = stop_here - atempo->position[0];
446 
447  if (stop_here <= atempo->position[0]) {
448  return 0;
449  }
450 
451  // samples are not expected to be skipped, unless tempo is greater than 2:
452  av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
453 
454  while (atempo->position[0] < stop_here && src < src_end) {
455  int src_samples = (src_end - src) / atempo->stride;
456 
457  // load data piece-wise, in order to avoid complicating the logic:
458  int nsamples = FFMIN(read_size, src_samples);
459  int na;
460  int nb;
461 
462  nsamples = FFMIN(nsamples, atempo->ring);
463  na = FFMIN(nsamples, atempo->ring - atempo->tail);
464  nb = FFMIN(nsamples - na, atempo->ring);
465 
466  if (na) {
467  uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
468  memcpy(a, src, na * atempo->stride);
469 
470  src += na * atempo->stride;
471  atempo->position[0] += na;
472 
473  atempo->size = FFMIN(atempo->size + na, atempo->ring);
474  atempo->tail = (atempo->tail + na) % atempo->ring;
475  atempo->head =
476  atempo->size < atempo->ring ?
477  atempo->tail - atempo->size :
478  atempo->tail;
479  }
480 
481  if (nb) {
482  uint8_t *b = atempo->buffer;
483  memcpy(b, src, nb * atempo->stride);
484 
485  src += nb * atempo->stride;
486  atempo->position[0] += nb;
487 
488  atempo->size = FFMIN(atempo->size + nb, atempo->ring);
489  atempo->tail = (atempo->tail + nb) % atempo->ring;
490  atempo->head =
491  atempo->size < atempo->ring ?
492  atempo->tail - atempo->size :
493  atempo->tail;
494  }
495  }
496 
497  // pass back the updated source buffer pointer:
498  *src_ref = src;
499 
500  // sanity check:
501  av_assert0(atempo->position[0] <= stop_here);
502 
503  return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
504 }
505 
506 /**
507  * Populate current audio fragment data buffer.
508  *
509  * @return
510  * 0 when the fragment is ready,
511  * AVERROR(EAGAIN) if more input data is required.
512  */
513 static int yae_load_frag(ATempoContext *atempo,
514  const uint8_t **src_ref,
515  const uint8_t *src_end)
516 {
517  // shortcuts:
518  AudioFragment *frag = yae_curr_frag(atempo);
519  uint8_t *dst;
520  int64_t missing, start, zeros;
521  uint32_t nsamples;
522  const uint8_t *a, *b;
523  int i0, i1, n0, n1, na, nb;
524 
525  int64_t stop_here = frag->position[0] + atempo->window;
526  if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
527  return AVERROR(EAGAIN);
528  }
529 
530  // calculate the number of samples we don't have:
531  missing =
532  stop_here > atempo->position[0] ?
533  stop_here - atempo->position[0] : 0;
534 
535  nsamples =
536  missing < (int64_t)atempo->window ?
537  (uint32_t)(atempo->window - missing) : 0;
538 
539  // setup the output buffer:
540  frag->nsamples = nsamples;
541  dst = frag->data;
542 
543  start = atempo->position[0] - atempo->size;
544  zeros = 0;
545 
546  if (frag->position[0] < start) {
547  // what we don't have we substitute with zeros:
548  zeros = FFMIN(start - frag->position[0], (int64_t)nsamples);
549  av_assert0(zeros != nsamples);
550 
551  memset(dst, 0, zeros * atempo->stride);
552  dst += zeros * atempo->stride;
553  }
554 
555  if (zeros == nsamples) {
556  return 0;
557  }
558 
559  // get the remaining data from the ring buffer:
560  na = (atempo->head < atempo->tail ?
561  atempo->tail - atempo->head :
562  atempo->ring - atempo->head);
563 
564  nb = atempo->head < atempo->tail ? 0 : atempo->tail;
565 
566  // sanity check:
567  av_assert0(nsamples <= zeros + na + nb);
568 
569  a = atempo->buffer + atempo->head * atempo->stride;
570  b = atempo->buffer;
571 
572  i0 = frag->position[0] + zeros - start;
573  i1 = i0 < na ? 0 : i0 - na;
574 
575  n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
576  n1 = nsamples - zeros - n0;
577 
578  if (n0) {
579  memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
580  dst += n0 * atempo->stride;
581  }
582 
583  if (n1) {
584  memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
585  }
586 
587  return 0;
588 }
589 
590 /**
591  * Prepare for loading next audio fragment.
592  */
594 {
595  const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
596 
597  const AudioFragment *prev;
598  AudioFragment *frag;
599 
600  atempo->nfrag++;
601  prev = yae_prev_frag(atempo);
602  frag = yae_curr_frag(atempo);
603 
604  frag->position[0] = prev->position[0] + (int64_t)fragment_step;
605  frag->position[1] = prev->position[1] + atempo->window / 2;
606  frag->nsamples = 0;
607 }
608 
609 /**
610  * Calculate cross-correlation via rDFT.
611  *
612  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
613  * and transform back via complex_to_real rDFT.
614  */
615 static void yae_xcorr_via_rdft(FFTSample *xcorr,
616  RDFTContext *complex_to_real,
617  const FFTComplex *xa,
618  const FFTComplex *xb,
619  const int window)
620 {
621  FFTComplex *xc = (FFTComplex *)xcorr;
622  int i;
623 
624  // NOTE: first element requires special care -- Given Y = rDFT(X),
625  // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
626  // stores Re(Y[N/2]) in place of Im(Y[0]).
627 
628  xc->re = xa->re * xb->re;
629  xc->im = xa->im * xb->im;
630  xa++;
631  xb++;
632  xc++;
633 
634  for (i = 1; i < window; i++, xa++, xb++, xc++) {
635  xc->re = (xa->re * xb->re + xa->im * xb->im);
636  xc->im = (xa->im * xb->re - xa->re * xb->im);
637  }
638 
639  // apply inverse rDFT:
640  av_rdft_calc(complex_to_real, xcorr);
641 }
642 
643 /**
644  * Calculate alignment offset for given fragment
645  * relative to the previous fragment.
646  *
647  * @return alignment offset of current fragment relative to previous.
648  */
649 static int yae_align(AudioFragment *frag,
650  const AudioFragment *prev,
651  const int window,
652  const int delta_max,
653  const int drift,
655  RDFTContext *complex_to_real)
656 {
657  int best_offset = -drift;
658  FFTSample best_metric = -FLT_MAX;
659  FFTSample *xcorr;
660 
661  int i0;
662  int i1;
663  int i;
664 
666  complex_to_real,
667  (const FFTComplex *)prev->xdat,
668  (const FFTComplex *)frag->xdat,
669  window);
670 
671  // identify search window boundaries:
672  i0 = FFMAX(window / 2 - delta_max - drift, 0);
673  i0 = FFMIN(i0, window);
674 
675  i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
676  i1 = FFMAX(i1, 0);
677 
678  // identify cross-correlation peaks within search window:
679  xcorr = correlation + i0;
680 
681  for (i = i0; i < i1; i++, xcorr++) {
682  FFTSample metric = *xcorr;
683 
684  // normalize:
685  FFTSample drifti = (FFTSample)(drift + i);
686  metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
687 
688  if (metric > best_metric) {
689  best_metric = metric;
690  best_offset = i - window / 2;
691  }
692  }
693 
694  return best_offset;
695 }
696 
697 /**
698  * Adjust current fragment position for better alignment
699  * with previous fragment.
700  *
701  * @return alignment correction.
702  */
704 {
705  const AudioFragment *prev = yae_prev_frag(atempo);
706  AudioFragment *frag = yae_curr_frag(atempo);
707 
708  const double prev_output_position =
709  (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) *
710  atempo->tempo;
711 
712  const double ideal_output_position =
713  (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2);
714 
715  const int drift = (int)(prev_output_position - ideal_output_position);
716 
717  const int delta_max = atempo->window / 2;
718  const int correction = yae_align(frag,
719  prev,
720  atempo->window,
721  delta_max,
722  drift,
723  atempo->correlation,
724  atempo->complex_to_real);
725 
726  if (correction) {
727  // adjust fragment position:
728  frag->position[0] -= correction;
729 
730  // clear so that the fragment can be reloaded:
731  frag->nsamples = 0;
732  }
733 
734  return correction;
735 }
736 
737 /**
738  * A helper macro for blending the overlap region of previous
739  * and current audio fragment.
740  */
741 #define yae_blend(scalar_type) \
742  do { \
743  const scalar_type *aaa = (const scalar_type *)a; \
744  const scalar_type *bbb = (const scalar_type *)b; \
745  \
746  scalar_type *out = (scalar_type *)dst; \
747  scalar_type *out_end = (scalar_type *)dst_end; \
748  int64_t i; \
749  \
750  for (i = 0; i < overlap && out < out_end; \
751  i++, atempo->position[1]++, wa++, wb++) { \
752  float w0 = *wa; \
753  float w1 = *wb; \
754  int j; \
755  \
756  for (j = 0; j < atempo->channels; \
757  j++, aaa++, bbb++, out++) { \
758  float t0 = (float)*aaa; \
759  float t1 = (float)*bbb; \
760  \
761  *out = \
762  frag->position[0] + i < 0 ? \
763  *aaa : \
764  (scalar_type)(t0 * w0 + t1 * w1); \
765  } \
766  } \
767  dst = (uint8_t *)out; \
768  } while (0)
769 
770 /**
771  * Blend the overlap region of previous and current audio fragment
772  * and output the results to the given destination buffer.
773  *
774  * @return
775  * 0 if the overlap region was completely stored in the dst buffer,
776  * AVERROR(EAGAIN) if more destination buffer space is required.
777  */
778 static int yae_overlap_add(ATempoContext *atempo,
779  uint8_t **dst_ref,
780  uint8_t *dst_end)
781 {
782  // shortcuts:
783  const AudioFragment *prev = yae_prev_frag(atempo);
784  const AudioFragment *frag = yae_curr_frag(atempo);
785 
786  const int64_t start_here = FFMAX(atempo->position[1],
787  frag->position[1]);
788 
789  const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
790  frag->position[1] + frag->nsamples);
791 
792  const int64_t overlap = stop_here - start_here;
793 
794  const int64_t ia = start_here - prev->position[1];
795  const int64_t ib = start_here - frag->position[1];
796 
797  const float *wa = atempo->hann + ia;
798  const float *wb = atempo->hann + ib;
799 
800  const uint8_t *a = prev->data + ia * atempo->stride;
801  const uint8_t *b = frag->data + ib * atempo->stride;
802 
803  uint8_t *dst = *dst_ref;
804 
805  av_assert0(start_here <= stop_here &&
806  frag->position[1] <= start_here &&
807  overlap <= frag->nsamples);
808 
809  if (atempo->format == AV_SAMPLE_FMT_U8) {
811  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
812  yae_blend(int16_t);
813  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
814  yae_blend(int);
815  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
816  yae_blend(float);
817  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
818  yae_blend(double);
819  }
820 
821  // pass-back the updated destination buffer pointer:
822  *dst_ref = dst;
823 
824  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
825 }
826 
827 /**
828  * Feed as much data to the filter as it is able to consume
829  * and receive as much processed data in the destination buffer
830  * as it is able to produce or store.
831  */
832 static void
834  const uint8_t **src_ref,
835  const uint8_t *src_end,
836  uint8_t **dst_ref,
837  uint8_t *dst_end)
838 {
839  while (1) {
840  if (atempo->state == YAE_LOAD_FRAGMENT) {
841  // load additional data for the current fragment:
842  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
843  break;
844  }
845 
846  // down-mix to mono:
847  yae_downmix(atempo, yae_curr_frag(atempo));
848 
849  // apply rDFT:
850  av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
851 
852  // must load the second fragment before alignment can start:
853  if (!atempo->nfrag) {
854  yae_advance_to_next_frag(atempo);
855  continue;
856  }
857 
858  atempo->state = YAE_ADJUST_POSITION;
859  }
860 
861  if (atempo->state == YAE_ADJUST_POSITION) {
862  // adjust position for better alignment:
863  if (yae_adjust_position(atempo)) {
864  // reload the fragment at the corrected position, so that the
865  // Hann window blending would not require normalization:
866  atempo->state = YAE_RELOAD_FRAGMENT;
867  } else {
868  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
869  }
870  }
871 
872  if (atempo->state == YAE_RELOAD_FRAGMENT) {
873  // load additional data if necessary due to position adjustment:
874  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
875  break;
876  }
877 
878  // down-mix to mono:
879  yae_downmix(atempo, yae_curr_frag(atempo));
880 
881  // apply rDFT:
882  av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
883 
884  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
885  }
886 
887  if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
888  // overlap-add and output the result:
889  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
890  break;
891  }
892 
893  // advance to the next fragment, repeat:
894  yae_advance_to_next_frag(atempo);
895  atempo->state = YAE_LOAD_FRAGMENT;
896  }
897  }
898 }
899 
900 /**
901  * Flush any buffered data from the filter.
902  *
903  * @return
904  * 0 if all data was completely stored in the dst buffer,
905  * AVERROR(EAGAIN) if more destination buffer space is required.
906  */
907 static int yae_flush(ATempoContext *atempo,
908  uint8_t **dst_ref,
909  uint8_t *dst_end)
910 {
911  AudioFragment *frag = yae_curr_frag(atempo);
912  int64_t overlap_end;
913  int64_t start_here;
914  int64_t stop_here;
915  int64_t offset;
916 
917  const uint8_t *src;
918  uint8_t *dst;
919 
920  int src_size;
921  int dst_size;
922  int nbytes;
923 
924  atempo->state = YAE_FLUSH_OUTPUT;
925 
926  if (!atempo->nfrag) {
927  // there is nothing to flush:
928  return 0;
929  }
930 
931  if (atempo->position[0] == frag->position[0] + frag->nsamples &&
932  atempo->position[1] == frag->position[1] + frag->nsamples) {
933  // the current fragment is already flushed:
934  return 0;
935  }
936 
937  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
938  // finish loading the current (possibly partial) fragment:
939  yae_load_frag(atempo, NULL, NULL);
940 
941  if (atempo->nfrag) {
942  // down-mix to mono:
943  yae_downmix(atempo, frag);
944 
945  // apply rDFT:
946  av_rdft_calc(atempo->real_to_complex, frag->xdat);
947 
948  // align current fragment to previous fragment:
949  if (yae_adjust_position(atempo)) {
950  // reload the current fragment due to adjusted position:
951  yae_load_frag(atempo, NULL, NULL);
952  }
953  }
954  }
955 
956  // flush the overlap region:
957  overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
958  frag->nsamples);
959 
960  while (atempo->position[1] < overlap_end) {
961  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
962  return AVERROR(EAGAIN);
963  }
964  }
965 
966  // check whether all of the input samples have been consumed:
967  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
968  yae_advance_to_next_frag(atempo);
969  return AVERROR(EAGAIN);
970  }
971 
972  // flush the remainder of the current fragment:
973  start_here = FFMAX(atempo->position[1], overlap_end);
974  stop_here = frag->position[1] + frag->nsamples;
975  offset = start_here - frag->position[1];
976  av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
977 
978  src = frag->data + offset * atempo->stride;
979  dst = (uint8_t *)*dst_ref;
980 
981  src_size = (int)(stop_here - start_here) * atempo->stride;
982  dst_size = dst_end - dst;
983  nbytes = FFMIN(src_size, dst_size);
984 
985  memcpy(dst, src, nbytes);
986  dst += nbytes;
987 
988  atempo->position[1] += (nbytes / atempo->stride);
989 
990  // pass-back the updated destination buffer pointer:
991  *dst_ref = (uint8_t *)dst;
992 
993  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
994 }
995 
997 {
998  ATempoContext *atempo = ctx->priv;
999  atempo->format = AV_SAMPLE_FMT_NONE;
1000  atempo->state = YAE_LOAD_FRAGMENT;
1001  return 0;
1002 }
1003 
1005 {
1006  ATempoContext *atempo = ctx->priv;
1007  yae_release_buffers(atempo);
1008 }
1009 
1011 {
1014 
1015  // WSOLA necessitates an internal sliding window ring buffer
1016  // for incoming audio stream.
1017  //
1018  // Planar sample formats are too cumbersome to store in a ring buffer,
1019  // therefore planar sample formats are not supported.
1020  //
1021  static const enum AVSampleFormat sample_fmts[] = {
1028  };
1029  int ret;
1030 
1032  if (!layouts) {
1033  return AVERROR(ENOMEM);
1034  }
1036  if (ret < 0)
1037  return ret;
1038 
1040  if (!formats) {
1041  return AVERROR(ENOMEM);
1042  }
1044  if (ret < 0)
1045  return ret;
1046 
1048  if (!formats) {
1049  return AVERROR(ENOMEM);
1050  }
1052 }
1053 
1055 {
1056  AVFilterContext *ctx = inlink->dst;
1057  ATempoContext *atempo = ctx->priv;
1058 
1059  enum AVSampleFormat format = inlink->format;
1060  int sample_rate = (int)inlink->sample_rate;
1061 
1062  return yae_reset(atempo, format, sample_rate, inlink->channels);
1063 }
1064 
1065 static int push_samples(ATempoContext *atempo,
1066  AVFilterLink *outlink,
1067  int n_out)
1068 {
1069  int ret;
1070 
1071  atempo->dst_buffer->sample_rate = outlink->sample_rate;
1072  atempo->dst_buffer->nb_samples = n_out;
1073 
1074  // adjust the PTS:
1075  atempo->dst_buffer->pts = atempo->start_pts +
1076  av_rescale_q(atempo->nsamples_out,
1077  (AVRational){ 1, outlink->sample_rate },
1078  outlink->time_base);
1079 
1080  ret = ff_filter_frame(outlink, atempo->dst_buffer);
1081  atempo->dst_buffer = NULL;
1082  atempo->dst = NULL;
1083  atempo->dst_end = NULL;
1084  if (ret < 0)
1085  return ret;
1086 
1087  atempo->nsamples_out += n_out;
1088  return 0;
1089 }
1090 
1091 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
1092 {
1093  AVFilterContext *ctx = inlink->dst;
1094  ATempoContext *atempo = ctx->priv;
1095  AVFilterLink *outlink = ctx->outputs[0];
1096 
1097  int ret = 0;
1098  int n_in = src_buffer->nb_samples;
1099  int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
1100 
1101  const uint8_t *src = src_buffer->data[0];
1102  const uint8_t *src_end = src + n_in * atempo->stride;
1103 
1104  if (atempo->start_pts == AV_NOPTS_VALUE)
1105  atempo->start_pts = av_rescale_q(src_buffer->pts,
1106  inlink->time_base,
1107  outlink->time_base);
1108 
1109  while (src < src_end) {
1110  if (!atempo->dst_buffer) {
1111  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
1112  if (!atempo->dst_buffer) {
1113  av_frame_free(&src_buffer);
1114  return AVERROR(ENOMEM);
1115  }
1116  av_frame_copy_props(atempo->dst_buffer, src_buffer);
1117 
1118  atempo->dst = atempo->dst_buffer->data[0];
1119  atempo->dst_end = atempo->dst + n_out * atempo->stride;
1120  }
1121 
1122  yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
1123 
1124  if (atempo->dst == atempo->dst_end) {
1125  int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
1126  atempo->stride);
1127  ret = push_samples(atempo, outlink, n_samples);
1128  if (ret < 0)
1129  goto end;
1130  }
1131  }
1132 
1133  atempo->nsamples_in += n_in;
1134 end:
1135  av_frame_free(&src_buffer);
1136  return ret;
1137 }
1138 
1139 static int request_frame(AVFilterLink *outlink)
1140 {
1141  AVFilterContext *ctx = outlink->src;
1142  ATempoContext *atempo = ctx->priv;
1143  int ret;
1144 
1145  ret = ff_request_frame(ctx->inputs[0]);
1146 
1147  if (ret == AVERROR_EOF) {
1148  // flush the filter:
1149  int n_max = atempo->ring;
1150  int n_out;
1151  int err = AVERROR(EAGAIN);
1152 
1153  while (err == AVERROR(EAGAIN)) {
1154  if (!atempo->dst_buffer) {
1155  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
1156  if (!atempo->dst_buffer)
1157  return AVERROR(ENOMEM);
1158 
1159  atempo->dst = atempo->dst_buffer->data[0];
1160  atempo->dst_end = atempo->dst + n_max * atempo->stride;
1161  }
1162 
1163  err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
1164 
1165  n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
1166  atempo->stride);
1167 
1168  if (n_out) {
1169  ret = push_samples(atempo, outlink, n_out);
1170  if (ret < 0)
1171  return ret;
1172  }
1173  }
1174 
1175  av_frame_free(&atempo->dst_buffer);
1176  atempo->dst = NULL;
1177  atempo->dst_end = NULL;
1178 
1179  return AVERROR_EOF;
1180  }
1181 
1182  return ret;
1183 }
1184 
1186  const char *cmd,
1187  const char *arg,
1188  char *res,
1189  int res_len,
1190  int flags)
1191 {
1192  return !strcmp(cmd, "tempo") ? yae_set_tempo(ctx, arg) : AVERROR(ENOSYS);
1193 }
1194 
1195 static const AVFilterPad atempo_inputs[] = {
1196  {
1197  .name = "default",
1198  .type = AVMEDIA_TYPE_AUDIO,
1199  .filter_frame = filter_frame,
1200  .config_props = config_props,
1201  },
1202  { NULL }
1203 };
1204 
1205 static const AVFilterPad atempo_outputs[] = {
1206  {
1207  .name = "default",
1208  .request_frame = request_frame,
1209  .type = AVMEDIA_TYPE_AUDIO,
1210  },
1211  { NULL }
1212 };
1213 
1215  .name = "atempo",
1216  .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
1217  .init = init,
1218  .uninit = uninit,
1219  .query_formats = query_formats,
1220  .process_command = process_command,
1221  .priv_size = sizeof(ATempoContext),
1222  .priv_class = &atempo_class,
1223  .inputs = atempo_inputs,
1225 };
formats
formats
Definition: signature.h:48
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:85
ATempoContext::stride
int stride
Definition: af_atempo.c:117
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:283
AudioFragment::xdat
FFTSample * xdat
Definition: af_atempo.c:70
push_samples
static int push_samples(ATempoContext *atempo, AVFilterLink *outlink, int n_out)
Definition: af_atempo.c:1065
ATempoContext::channels
int channels
Definition: af_atempo.c:113
config_props
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1054
ff_set_common_channel_layouts
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:549
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:686
layouts
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:55
ATempoContext::size
int size
Definition: af_atempo.c:98
ATempoContext::dst_end
uint8_t * dst_end
Definition: af_atempo.c:150
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
yae_downmix
static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate sca...
Definition: af_atempo.c:410
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
ff_all_channel_counts
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:410
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
YAE_FLUSH_OUTPUT
@ YAE_FLUSH_OUTPUT
Definition: af_atempo.c:81
yae_load_data
static int yae_load_data(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
Populate the internal data buffer on as-needed basis.
Definition: af_atempo.c:438
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
AVOption
AVOption.
Definition: opt.h:246
b
#define b
Definition: input.c:41
ATempoContext::position
int64_t position[2]
Definition: af_atempo.c:104
YAE_RELOAD_FRAGMENT
@ YAE_RELOAD_FRAGMENT
Definition: af_atempo.c:79
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:407
ATempoContext::nsamples_out
uint64_t nsamples_out
Definition: af_atempo.c:152
float.h
channels
channels
Definition: aptx.c:30
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:148
OFFSET
#define OFFSET(x)
Definition: af_atempo.c:158
ATempoContext::frag
AudioFragment frag[2]
Definition: af_atempo.c:134
AV_OPT_FLAG_FILTERING_PARAM
#define AV_OPT_FLAG_FILTERING_PARAM
a generic parameter which can be set by the user for filtering
Definition: opt.h:291
ATempoContext::tail
int tail
Definition: af_atempo.c:100
sample_rate
sample_rate
Definition: ffmpeg_filter.c:191
init
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:996
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
ATempoContext
Filter state machine.
Definition: af_atempo.c:87
window
static SDL_Window * window
Definition: ffplay.c:367
start
void INT64 start
Definition: avisynth_c.h:767
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_atempo.c:1010
YAE_ADJUST_POSITION
@ YAE_ADJUST_POSITION
Definition: af_atempo.c:78
samplefmt.h
IDFT_C2R
@ IDFT_C2R
Definition: avfft.h:73
ATempoContext::state
FilterState state
Definition: af_atempo.c:140
ATempoContext::origin
int64_t origin[2]
Definition: af_atempo.c:131
atempo_outputs
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1205
src
#define src
Definition: vp8dsp.c:254
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
av_cold
#define av_cold
Definition: attributes.h:84
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:568
yae_apply
static void yae_apply(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
Feed as much data to the filter as it is able to consume and receive as much processed data in the de...
Definition: af_atempo.c:833
ATempoContext::dst
uint8_t * dst
Definition: af_atempo.c:149
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
yae_init_xdat
#define yae_init_xdat(scalar_type, scalar_max)
A helper macro for initializing complex data buffer with scalar data of a given type.
Definition: af_atempo.c:360
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:225
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
YAE_ATEMPO_MAX
#define YAE_ATEMPO_MAX
Definition: af_atempo.c:156
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
YAE_OUTPUT_OVERLAP_ADD
@ YAE_OUTPUT_OVERLAP_ADD
Definition: af_atempo.c:80
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ATempoContext::real_to_complex
RDFTContext * real_to_complex
Definition: af_atempo.c:143
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
ATempoContext::buffer
uint8_t * buffer
Definition: af_atempo.c:92
ATempoContext::ring
int ring
Definition: af_atempo.c:95
ATempoContext::correlation
FFTSample * correlation
Definition: af_atempo.c:145
av_rdft_calc
void av_rdft_calc(RDFTContext *s, FFTSample *data)
AV_OPT_FLAG_AUDIO_PARAM
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:278
RE_MALLOC_OR_FAIL
#define RE_MALLOC_OR_FAIL(field, field_size)
Definition: af_atempo.c:247
arg
const char * arg
Definition: jacosubdec.c:66
ATempoContext::complex_to_real
RDFTContext * complex_to_real
Definition: af_atempo.c:144
yae_align
static int yae_align(AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, FFTSample *correlation, RDFTContext *complex_to_real)
Calculate alignment offset for given fragment relative to the previous fragment.
Definition: af_atempo.c:649
ATempoContext::tempo
double tempo
Definition: af_atempo.c:127
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:654
AudioFragment::position
int64_t position[2]
Definition: af_atempo.c:60
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
ATempoContext::head
int head
Definition: af_atempo.c:99
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(atempo)
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1091
DFT_R2C
@ DFT_R2C
Definition: avfft.h:72
ff_af_atempo
AVFilter ff_af_atempo
Definition: af_atempo.c:1214
FFTSample
float FFTSample
Definition: avfft.h:35
avfft.h
yae_curr_frag
static AudioFragment * yae_curr_frag(ATempoContext *atempo)
Definition: af_atempo.c:171
yae_reset
static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
Prepare filter for processing audio data of given format, sample rate and number of channels.
Definition: af_atempo.c:261
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
AV_SAMPLE_FMT_U8
AV_SAMPLE_FMT_U8
Definition: audio_convert.c:194
correlation
static void correlation(int32_t *corr, int32_t *ener, int16_t *buffer, int16_t lag, int16_t blen, int16_t srange, int16_t scale)
Definition: ilbcdec.c:912
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1185
yae_overlap_add
static int yae_overlap_add(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Blend the overlap region of previous and current audio fragment and output the results to the given d...
Definition: af_atempo.c:778
ATempoContext::nsamples_in
uint64_t nsamples_in
Definition: af_atempo.c:151
eval.h
yae_load_frag
static int yae_load_frag(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
Populate current audio fragment data buffer.
Definition: af_atempo.c:513
av_rdft_init
RDFTContext * av_rdft_init(int nbits, enum RDFTransformType trans)
Set up a real FFT.
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:188
yae_prev_frag
static AudioFragment * yae_prev_frag(ATempoContext *atempo)
Definition: af_atempo.c:176
AVFrame::sample_rate
int sample_rate
Sample rate of the audio data.
Definition: frame.h:467
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
ATempoContext::hann
float * hann
Definition: af_atempo.c:124
AudioFragment::nsamples
int nsamples
Definition: af_atempo.c:66
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
AudioFragment::data
uint8_t * data
Definition: af_atempo.c:63
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
yae_set_tempo
static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo)
Definition: af_atempo.c:331
yae_blend
#define yae_blend(scalar_type)
A helper macro for blending the overlap region of previous and current audio fragment.
Definition: af_atempo.c:741
M_PI
#define M_PI
Definition: mathematics.h:52
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:361
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
av_get_bytes_per_sample
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:106
YAE_ATEMPO_MIN
#define YAE_ATEMPO_MIN
Definition: af_atempo.c:155
yae_advance_to_next_frag
static void yae_advance_to_next_frag(ATempoContext *atempo)
Prepare for loading next audio fragment.
Definition: af_atempo.c:593
ATempoContext::dst_buffer
AVFrame * dst_buffer
Definition: af_atempo.c:148
RDFTContext
Definition: rdft.h:28
yae_clear
static void yae_clear(ATempoContext *atempo)
Reset filter to initial state, do not deallocate existing local buffers.
Definition: af_atempo.c:184
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
ATempoContext::window
int window
Definition: af_atempo.c:120
uint8_t
uint8_t
Definition: audio_convert.c:194
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:61
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
atempo_inputs
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1195
yae_flush
static int yae_flush(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Flush any buffered data from the filter.
Definition: af_atempo.c:907
AVFilter
Filter definition.
Definition: avfilter.h:144
ret
ret
Definition: filter_design.txt:187
atempo_options
static const AVOption atempo_options[]
Definition: af_atempo.c:160
ATempoContext::nfrag
uint64_t nfrag
Definition: af_atempo.c:137
av_strtod
double av_strtod(const char *numstr, char **tail)
Parse the string in numstr and return its value as a double.
Definition: eval.c:106
request_frame
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1139
yae_xcorr_via_rdft
static void yae_xcorr_via_rdft(FFTSample *xcorr, RDFTContext *complex_to_real, const FFTComplex *xa, const FFTComplex *xb, const int window)
Calculate cross-correlation via rDFT.
Definition: af_atempo.c:615
ATempoContext::format
enum AVSampleFormat format
Definition: af_atempo.c:110
ff_all_samplerates
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:395
channel_layout.h
yae_adjust_position
static int yae_adjust_position(ATempoContext *atempo)
Adjust current fragment position for better alignment with previous fragment.
Definition: af_atempo.c:703
avfilter.h
FilterState
FilterState
Filter state machine states.
Definition: af_atempo.c:76
AVFilterContext
An instance of a filter.
Definition: avfilter.h:338
audio.h
ib
#define ib(width, name)
Definition: cbs_h2645.c:261
ATempoContext::start_pts
int64_t start_pts
Definition: af_atempo.c:107
yae_release_buffers
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:225
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:1004
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
av_rdft_end
void av_rdft_end(RDFTContext *s)
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
ff_set_common_samplerates
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:556
h
h
Definition: vp9dsp_template.c:2038
AV_SAMPLE_FMT_DBL
@ AV_SAMPLE_FMT_DBL
double
Definition: samplefmt.h:64
avstring.h
int
int
Definition: ffmpeg_filter.c:191
AV_SAMPLE_FMT_S32
@ AV_SAMPLE_FMT_S32
signed 32 bits
Definition: samplefmt.h:62
AudioFragment
A fragment of audio waveform.
Definition: af_atempo.c:56
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:63
YAE_LOAD_FRAGMENT
@ YAE_LOAD_FRAGMENT
Definition: af_atempo.c:77
FFTComplex
Definition: avfft.h:37