FFmpeg
af_axcorrelate.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Paul B Mahol
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/audio_fifo.h"
23 #include "libavutil/common.h"
24 #include "libavutil/opt.h"
25 
26 #include "audio.h"
27 #include "avfilter.h"
28 #include "formats.h"
29 #include "filters.h"
30 #include "internal.h"
31 
32 typedef struct AudioXCorrelateContext {
33  const AVClass *class;
34 
35  int size;
36  int algo;
37  int64_t pts;
38 
44  int used;
45 
48 
50 {
51  static const enum AVSampleFormat sample_fmts[] = {
54  };
56  if (ret < 0)
57  return ret;
58 
60  if (ret < 0)
61  return ret;
62 
64 }
65 
66 static float mean_sum(const float *in, int size)
67 {
68  float mean_sum = 0.f;
69 
70  for (int i = 0; i < size; i++)
71  mean_sum += in[i];
72 
73  return mean_sum;
74 }
75 
76 static float square_sum(const float *x, const float *y, int size)
77 {
78  float square_sum = 0.f;
79 
80  for (int i = 0; i < size; i++)
81  square_sum += x[i] * y[i];
82 
83  return square_sum;
84 }
85 
86 static float xcorrelate(const float *x, const float *y, float sumx, float sumy, int size)
87 {
88  const float xm = sumx / size, ym = sumy / size;
89  float num = 0.f, den, den0 = 0.f, den1 = 0.f;
90 
91  for (int i = 0; i < size; i++) {
92  float xd = x[i] - xm;
93  float yd = y[i] - ym;
94 
95  num += xd * yd;
96  den0 += xd * xd;
97  den1 += yd * yd;
98  }
99 
100  num /= size;
101  den = sqrtf((den0 * den1) / (size * size));
102 
103  return den <= 1e-6f ? 0.f : num / den;
104 }
105 
107 {
108  AudioXCorrelateContext *s = ctx->priv;
109  const int size = s->size;
110  int used;
111 
112  for (int ch = 0; ch < out->channels; ch++) {
113  const float *x = (const float *)s->cache[0]->extended_data[ch];
114  const float *y = (const float *)s->cache[1]->extended_data[ch];
115  float *sumx = (float *)s->mean_sum[0]->extended_data[ch];
116  float *sumy = (float *)s->mean_sum[1]->extended_data[ch];
117  float *dst = (float *)out->extended_data[ch];
118 
119  used = s->used;
120  if (!used) {
121  sumx[0] = mean_sum(x, size);
122  sumy[0] = mean_sum(y, size);
123  used = 1;
124  }
125 
126  for (int n = 0; n < out->nb_samples; n++) {
127  dst[n] = xcorrelate(x + n, y + n, sumx[0], sumy[0], size);
128 
129  sumx[0] -= x[n];
130  sumx[0] += x[n + size];
131  sumy[0] -= y[n];
132  sumy[0] += y[n + size];
133  }
134  }
135 
136  return used;
137 }
138 
140 {
141  AudioXCorrelateContext *s = ctx->priv;
142  const int size = s->size;
143  int used;
144 
145  for (int ch = 0; ch < out->channels; ch++) {
146  const float *x = (const float *)s->cache[0]->extended_data[ch];
147  const float *y = (const float *)s->cache[1]->extended_data[ch];
148  float *num_sum = (float *)s->num_sum->extended_data[ch];
149  float *den_sumx = (float *)s->den_sum[0]->extended_data[ch];
150  float *den_sumy = (float *)s->den_sum[1]->extended_data[ch];
151  float *dst = (float *)out->extended_data[ch];
152 
153  used = s->used;
154  if (!used) {
155  num_sum[0] = square_sum(x, y, size);
156  den_sumx[0] = square_sum(x, x, size);
157  den_sumy[0] = square_sum(y, y, size);
158  used = 1;
159  }
160 
161  for (int n = 0; n < out->nb_samples; n++) {
162  float num, den;
163 
164  num = num_sum[0] / size;
165  den = sqrtf((den_sumx[0] * den_sumy[0]) / (size * size));
166 
167  dst[n] = den <= 1e-6f ? 0.f : num / den;
168 
169  num_sum[0] -= x[n] * y[n];
170  num_sum[0] += x[n + size] * y[n + size];
171  den_sumx[0] -= x[n] * x[n];
172  den_sumx[0] = FFMAX(den_sumx[0], 0.f);
173  den_sumx[0] += x[n + size] * x[n + size];
174  den_sumy[0] -= y[n] * y[n];
175  den_sumy[0] = FFMAX(den_sumy[0], 0.f);
176  den_sumy[0] += y[n + size] * y[n + size];
177  }
178  }
179 
180  return used;
181 }
182 
184 {
185  AudioXCorrelateContext *s = ctx->priv;
186  AVFrame *frame = NULL;
187  int ret, status;
188  int available;
189  int64_t pts;
190 
192 
193  for (int i = 0; i < 2; i++) {
194  ret = ff_inlink_consume_frame(ctx->inputs[i], &frame);
195  if (ret > 0) {
196  if (s->pts == AV_NOPTS_VALUE)
197  s->pts = frame->pts;
198  ret = av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
199  frame->nb_samples);
201  if (ret < 0)
202  return ret;
203  }
204  }
205 
206  available = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
207  if (available > s->size) {
208  const int out_samples = available - s->size;
209  AVFrame *out;
210 
211  if (!s->cache[0] || s->cache[0]->nb_samples < available) {
212  av_frame_free(&s->cache[0]);
213  s->cache[0] = ff_get_audio_buffer(ctx->outputs[0], available);
214  if (!s->cache[0])
215  return AVERROR(ENOMEM);
216  }
217 
218  if (!s->cache[1] || s->cache[1]->nb_samples < available) {
219  av_frame_free(&s->cache[1]);
220  s->cache[1] = ff_get_audio_buffer(ctx->outputs[0], available);
221  if (!s->cache[1])
222  return AVERROR(ENOMEM);
223  }
224 
225  ret = av_audio_fifo_peek(s->fifo[0], (void **)s->cache[0]->extended_data, available);
226  if (ret < 0)
227  return ret;
228 
229  ret = av_audio_fifo_peek(s->fifo[1], (void **)s->cache[1]->extended_data, available);
230  if (ret < 0)
231  return ret;
232 
233  out = ff_get_audio_buffer(ctx->outputs[0], out_samples);
234  if (!out)
235  return AVERROR(ENOMEM);
236 
237  s->used = s->xcorrelate(ctx, out);
238 
239  out->pts = s->pts;
240  s->pts += out_samples;
241 
242  av_audio_fifo_drain(s->fifo[0], out_samples);
243  av_audio_fifo_drain(s->fifo[1], out_samples);
244 
245  return ff_filter_frame(ctx->outputs[0], out);
246  }
247 
248  if (av_audio_fifo_size(s->fifo[0]) > s->size &&
249  av_audio_fifo_size(s->fifo[1]) > s->size) {
251  return 0;
252  }
253 
254  for (int i = 0; i < 2; i++) {
255  if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
256  ff_outlink_set_status(ctx->outputs[0], status, pts);
257  return 0;
258  }
259  }
260 
261  if (ff_outlink_frame_wanted(ctx->outputs[0])) {
262  for (int i = 0; i < 2; i++) {
263  if (av_audio_fifo_size(s->fifo[i]) > s->size)
264  continue;
265  ff_inlink_request_frame(ctx->inputs[i]);
266  return 0;
267  }
268  }
269 
270  return FFERROR_NOT_READY;
271 }
272 
273 static int config_output(AVFilterLink *outlink)
274 {
275  AVFilterContext *ctx = outlink->src;
276  AVFilterLink *inlink = ctx->inputs[0];
277  AudioXCorrelateContext *s = ctx->priv;
278 
279  s->pts = AV_NOPTS_VALUE;
280 
281  outlink->format = inlink->format;
282  outlink->channels = inlink->channels;
283  s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size);
284  s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size);
285  if (!s->fifo[0] || !s->fifo[1])
286  return AVERROR(ENOMEM);
287 
288  s->mean_sum[0] = ff_get_audio_buffer(outlink, 1);
289  s->mean_sum[1] = ff_get_audio_buffer(outlink, 1);
290  s->num_sum = ff_get_audio_buffer(outlink, 1);
291  s->den_sum[0] = ff_get_audio_buffer(outlink, 1);
292  s->den_sum[1] = ff_get_audio_buffer(outlink, 1);
293  if (!s->mean_sum[0] || !s->mean_sum[1] || !s->num_sum ||
294  !s->den_sum[0] || !s->den_sum[1])
295  return AVERROR(ENOMEM);
296 
297  switch (s->algo) {
298  case 0: s->xcorrelate = xcorrelate_slow; break;
299  case 1: s->xcorrelate = xcorrelate_fast; break;
300  }
301 
302  return 0;
303 }
304 
306 {
307  AudioXCorrelateContext *s = ctx->priv;
308 
309  av_audio_fifo_free(s->fifo[0]);
310  av_audio_fifo_free(s->fifo[1]);
311  av_frame_free(&s->cache[0]);
312  av_frame_free(&s->cache[1]);
313  av_frame_free(&s->mean_sum[0]);
314  av_frame_free(&s->mean_sum[1]);
315  av_frame_free(&s->num_sum);
316  av_frame_free(&s->den_sum[0]);
317  av_frame_free(&s->den_sum[1]);
318 }
319 
320 static const AVFilterPad inputs[] = {
321  {
322  .name = "axcorrelate0",
323  .type = AVMEDIA_TYPE_AUDIO,
324  },
325  {
326  .name = "axcorrelate1",
327  .type = AVMEDIA_TYPE_AUDIO,
328  },
329 };
330 
331 static const AVFilterPad outputs[] = {
332  {
333  .name = "default",
334  .type = AVMEDIA_TYPE_AUDIO,
335  .config_props = config_output,
336  },
337 };
338 
339 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
340 #define OFFSET(x) offsetof(AudioXCorrelateContext, x)
341 
342 static const AVOption axcorrelate_options[] = {
343  { "size", "set segment size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=256}, 2, 131072, AF },
344  { "algo", "set alghorithm", OFFSET(algo), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "algo" },
345  { "slow", "slow algorithm", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "algo" },
346  { "fast", "fast algorithm", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "algo" },
347  { NULL }
348 };
349 
350 AVFILTER_DEFINE_CLASS(axcorrelate);
351 
353  .name = "axcorrelate",
354  .description = NULL_IF_CONFIG_SMALL("Cross-correlate two audio streams."),
355  .priv_size = sizeof(AudioXCorrelateContext),
356  .priv_class = &axcorrelate_class,
358  .activate = activate,
359  .uninit = uninit,
362 };
av_audio_fifo_free
void av_audio_fifo_free(AVAudioFifo *af)
Free an AVAudioFifo.
Definition: audio_fifo.c:45
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:88
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:54
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1019
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:948
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(axcorrelate)
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:112
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
AVOption
AVOption.
Definition: opt.h:247
ff_set_common_all_samplerates
int ff_set_common_all_samplerates(AVFilterContext *ctx)
Equivalent to ff_set_common_samplerates(ctx, ff_all_samplerates())
Definition: formats.c:687
AudioXCorrelateContext::fifo
AVAudioFifo * fifo[2]
Definition: af_axcorrelate.c:39
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:153
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_axcorrelate.c:49
formats.h
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1418
FF_FILTER_FORWARD_STATUS_BACK_ALL
#define FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, filter)
Forward the status on an output link to all input links.
Definition: filters.h:212
AVAudioFifo
Context for an Audio FIFO Buffer.
Definition: audio_fifo.c:34
av_audio_fifo_drain
int av_audio_fifo_drain(AVAudioFifo *af, int nb_samples)
Drain data from an AVAudioFifo.
Definition: audio_fifo.c:201
pts
static int64_t pts
Definition: transcode_aac.c:653
xcorrelate_fast
static int xcorrelate_fast(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:139
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:50
square_sum
static float square_sum(const float *x, const float *y, int size)
Definition: af_axcorrelate.c:76
av_cold
#define av_cold
Definition: attributes.h:90
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
OFFSET
#define OFFSET(x)
Definition: af_axcorrelate.c:340
ff_inlink_request_frame
void ff_inlink_request_frame(AVFilterLink *link)
Mark that a frame is wanted on the link.
Definition: avfilter.c:1535
AudioXCorrelateContext::num_sum
AVFrame * num_sum
Definition: af_axcorrelate.c:42
s
#define s(width, name)
Definition: cbs_vp9.c:257
config_output
static int config_output(AVFilterLink *outlink)
Definition: af_axcorrelate.c:273
av_audio_fifo_write
int av_audio_fifo_write(AVAudioFifo *af, void **data, int nb_samples)
Write data to an AVAudioFifo.
Definition: audio_fifo.c:112
AudioXCorrelateContext::used
int used
Definition: af_axcorrelate.c:44
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
ff_set_common_formats_from_list
int ff_set_common_formats_from_list(AVFilterContext *ctx, const int *fmts)
Equivalent to ff_set_common_formats(ctx, ff_make_format_list(fmts))
Definition: formats.c:703
filters.h
AudioXCorrelateContext
Definition: af_axcorrelate.c:32
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ff_af_axcorrelate
const AVFilter ff_af_axcorrelate
Definition: af_axcorrelate.c:352
f
#define f(width, name)
Definition: cbs_vp9.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:152
mean_sum
static float mean_sum(const float *in, int size)
Definition: af_axcorrelate.c:66
if
if(ret)
Definition: filter_design.txt:179
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_audio_fifo_alloc
AVAudioFifo * av_audio_fifo_alloc(enum AVSampleFormat sample_fmt, int channels, int nb_samples)
Allocate an AVAudioFifo.
Definition: audio_fifo.c:59
AudioXCorrelateContext::mean_sum
AVFrame * mean_sum[2]
Definition: af_axcorrelate.c:41
AudioXCorrelateContext::algo
int algo
Definition: af_axcorrelate.c:36
ff_set_common_all_channel_counts
int ff_set_common_all_channel_counts(AVFilterContext *ctx)
Equivalent to ff_set_common_channel_layouts(ctx, ff_all_channel_counts())
Definition: formats.c:669
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1372
xcorrelate_slow
static int xcorrelate_slow(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:106
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
AF
#define AF
Definition: af_axcorrelate.c:339
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
size
int size
Definition: twinvq_data.h:10344
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
AudioXCorrelateContext::size
int size
Definition: af_axcorrelate.c:35
AudioXCorrelateContext::xcorrelate
int(* xcorrelate)(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:46
av_audio_fifo_size
int av_audio_fifo_size(AVAudioFifo *af)
Get the current number of samples in the AVAudioFifo available for reading.
Definition: audio_fifo.c:228
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_axcorrelate.c:305
internal.h
AudioXCorrelateContext::cache
AVFrame * cache[2]
Definition: af_axcorrelate.c:40
i
int i
Definition: input.c:406
algo
Definition: dct.c:53
AudioXCorrelateContext::den_sum
AVFrame * den_sum[2]
Definition: af_axcorrelate.c:43
available
if no frame is available
Definition: filter_design.txt:166
common.h
axcorrelate_options
static const AVOption axcorrelate_options[]
Definition: af_axcorrelate.c:342
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
audio_fifo.h
inputs
static const AVFilterPad inputs[]
Definition: af_axcorrelate.c:320
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:56
AVFilter
Filter definition.
Definition: avfilter.h:149
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
activate
static int activate(AVFilterContext *ctx)
Definition: af_axcorrelate.c:183
channel_layout.h
AudioXCorrelateContext::pts
int64_t pts
Definition: af_axcorrelate.c:37
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:224
avfilter.h
AVFilterContext
An instance of a filter.
Definition: avfilter.h:346
audio.h
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:153
ff_outlink_frame_wanted
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the ff_outlink_frame_wanted() function. If this function returns true
xcorrelate
static float xcorrelate(const float *x, const float *y, float sumx, float sumy, int size)
Definition: af_axcorrelate.c:86
int
int
Definition: ffmpeg_filter.c:156
av_audio_fifo_peek
int av_audio_fifo_peek(AVAudioFifo *af, void **data, int nb_samples)
Peek data from an AVAudioFifo.
Definition: audio_fifo.c:138
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:233
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:212
outputs
static const AVFilterPad outputs[]
Definition: af_axcorrelate.c:331