00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00031 #include "libavutil/audioconvert.h"
00032 #include "libavutil/audio_fifo.h"
00033 #include "libavutil/avassert.h"
00034 #include "libavutil/avstring.h"
00035 #include "libavutil/mathematics.h"
00036 #include "libavutil/opt.h"
00037 #include "libavutil/samplefmt.h"
00038
00039 #include "audio.h"
00040 #include "avfilter.h"
00041 #include "formats.h"
00042 #include "internal.h"
00043
00044 #define INPUT_OFF 0
00045 #define INPUT_ON 1
00046 #define INPUT_INACTIVE 2
00048 #define DURATION_LONGEST 0
00049 #define DURATION_SHORTEST 1
00050 #define DURATION_FIRST 2
00051
00052
00053 typedef struct FrameInfo {
00054 int nb_samples;
00055 int64_t pts;
00056 struct FrameInfo *next;
00057 } FrameInfo;
00058
00067 typedef struct FrameList {
00068 int nb_frames;
00069 int nb_samples;
00070 FrameInfo *list;
00071 FrameInfo *end;
00072 } FrameList;
00073
00074 static void frame_list_clear(FrameList *frame_list)
00075 {
00076 if (frame_list) {
00077 while (frame_list->list) {
00078 FrameInfo *info = frame_list->list;
00079 frame_list->list = info->next;
00080 av_free(info);
00081 }
00082 frame_list->nb_frames = 0;
00083 frame_list->nb_samples = 0;
00084 frame_list->end = NULL;
00085 }
00086 }
00087
00088 static int frame_list_next_frame_size(FrameList *frame_list)
00089 {
00090 if (!frame_list->list)
00091 return 0;
00092 return frame_list->list->nb_samples;
00093 }
00094
00095 static int64_t frame_list_next_pts(FrameList *frame_list)
00096 {
00097 if (!frame_list->list)
00098 return AV_NOPTS_VALUE;
00099 return frame_list->list->pts;
00100 }
00101
00102 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
00103 {
00104 if (nb_samples >= frame_list->nb_samples) {
00105 frame_list_clear(frame_list);
00106 } else {
00107 int samples = nb_samples;
00108 while (samples > 0) {
00109 FrameInfo *info = frame_list->list;
00110 av_assert0(info != NULL);
00111 if (info->nb_samples <= samples) {
00112 samples -= info->nb_samples;
00113 frame_list->list = info->next;
00114 if (!frame_list->list)
00115 frame_list->end = NULL;
00116 frame_list->nb_frames--;
00117 frame_list->nb_samples -= info->nb_samples;
00118 av_free(info);
00119 } else {
00120 info->nb_samples -= samples;
00121 info->pts += samples;
00122 frame_list->nb_samples -= samples;
00123 samples = 0;
00124 }
00125 }
00126 }
00127 }
00128
00129 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
00130 {
00131 FrameInfo *info = av_malloc(sizeof(*info));
00132 if (!info)
00133 return AVERROR(ENOMEM);
00134 info->nb_samples = nb_samples;
00135 info->pts = pts;
00136 info->next = NULL;
00137
00138 if (!frame_list->list) {
00139 frame_list->list = info;
00140 frame_list->end = info;
00141 } else {
00142 av_assert0(frame_list->end != NULL);
00143 frame_list->end->next = info;
00144 frame_list->end = info;
00145 }
00146 frame_list->nb_frames++;
00147 frame_list->nb_samples += nb_samples;
00148
00149 return 0;
00150 }
00151
00152
00153 typedef struct MixContext {
00154 const AVClass *class;
00156 int nb_inputs;
00157 int active_inputs;
00158 int duration_mode;
00159 float dropout_transition;
00161 int nb_channels;
00162 int sample_rate;
00163 AVAudioFifo **fifos;
00164 uint8_t *input_state;
00165 float *input_scale;
00166 float scale_norm;
00167 int64_t next_pts;
00168 FrameList *frame_list;
00169 } MixContext;
00170
00171 #define OFFSET(x) offsetof(MixContext, x)
00172 #define A AV_OPT_FLAG_AUDIO_PARAM
00173 static const AVOption options[] = {
00174 { "inputs", "Number of inputs.",
00175 OFFSET(nb_inputs), AV_OPT_TYPE_INT, { 2 }, 1, 32, A },
00176 { "duration", "How to determine the end-of-stream.",
00177 OFFSET(duration_mode), AV_OPT_TYPE_INT, { DURATION_LONGEST }, 0, 2, A, "duration" },
00178 { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { DURATION_LONGEST }, INT_MIN, INT_MAX, A, "duration" },
00179 { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { DURATION_SHORTEST }, INT_MIN, INT_MAX, A, "duration" },
00180 { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { DURATION_FIRST }, INT_MIN, INT_MAX, A, "duration" },
00181 { "dropout_transition", "Transition time, in seconds, for volume "
00182 "renormalization when an input stream ends.",
00183 OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { 2.0 }, 0, INT_MAX, A },
00184 { NULL },
00185 };
00186
00187 static const AVClass amix_class = {
00188 .class_name = "amix filter",
00189 .item_name = av_default_item_name,
00190 .option = options,
00191 .version = LIBAVUTIL_VERSION_INT,
00192 };
00193
00194
00202 static void calculate_scales(MixContext *s, int nb_samples)
00203 {
00204 int i;
00205
00206 if (s->scale_norm > s->active_inputs) {
00207 s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
00208 s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
00209 }
00210
00211 for (i = 0; i < s->nb_inputs; i++) {
00212 if (s->input_state[i] == INPUT_ON)
00213 s->input_scale[i] = 1.0f / s->scale_norm;
00214 else
00215 s->input_scale[i] = 0.0f;
00216 }
00217 }
00218
00219 static int config_output(AVFilterLink *outlink)
00220 {
00221 AVFilterContext *ctx = outlink->src;
00222 MixContext *s = ctx->priv;
00223 int i;
00224 char buf[64];
00225
00226 s->sample_rate = outlink->sample_rate;
00227 outlink->time_base = (AVRational){ 1, outlink->sample_rate };
00228 s->next_pts = AV_NOPTS_VALUE;
00229
00230 s->frame_list = av_mallocz(sizeof(*s->frame_list));
00231 if (!s->frame_list)
00232 return AVERROR(ENOMEM);
00233
00234 s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
00235 if (!s->fifos)
00236 return AVERROR(ENOMEM);
00237
00238 s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
00239 for (i = 0; i < s->nb_inputs; i++) {
00240 s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
00241 if (!s->fifos[i])
00242 return AVERROR(ENOMEM);
00243 }
00244
00245 s->input_state = av_malloc(s->nb_inputs);
00246 if (!s->input_state)
00247 return AVERROR(ENOMEM);
00248 memset(s->input_state, INPUT_ON, s->nb_inputs);
00249 s->active_inputs = s->nb_inputs;
00250
00251 s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
00252 if (!s->input_scale)
00253 return AVERROR(ENOMEM);
00254 s->scale_norm = s->active_inputs;
00255 calculate_scales(s, 0);
00256
00257 av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
00258
00259 av_log(ctx, AV_LOG_VERBOSE,
00260 "inputs:%d fmt:%s srate:%"PRId64" cl:%s\n", s->nb_inputs,
00261 av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
00262
00263 return 0;
00264 }
00265
00266
00267 static void vector_fmac_scalar(float *dst, const float *src, float mul, int len)
00268 {
00269 int i;
00270 for (i = 0; i < len; i++)
00271 dst[i] += src[i] * mul;
00272 }
00273
00277 static int output_frame(AVFilterLink *outlink, int nb_samples)
00278 {
00279 AVFilterContext *ctx = outlink->src;
00280 MixContext *s = ctx->priv;
00281 AVFilterBufferRef *out_buf, *in_buf;
00282 int i;
00283
00284 calculate_scales(s, nb_samples);
00285
00286 out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00287 if (!out_buf)
00288 return AVERROR(ENOMEM);
00289
00290 in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
00291 if (!in_buf)
00292 return AVERROR(ENOMEM);
00293
00294 for (i = 0; i < s->nb_inputs; i++) {
00295 if (s->input_state[i] == INPUT_ON) {
00296 av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
00297 nb_samples);
00298 vector_fmac_scalar((float *)out_buf->extended_data[0],
00299 (float *) in_buf->extended_data[0],
00300 s->input_scale[i], nb_samples * s->nb_channels);
00301 }
00302 }
00303 avfilter_unref_buffer(in_buf);
00304
00305 out_buf->pts = s->next_pts;
00306 if (s->next_pts != AV_NOPTS_VALUE)
00307 s->next_pts += nb_samples;
00308
00309 ff_filter_samples(outlink, out_buf);
00310
00311 return 0;
00312 }
00313
00318 static int get_available_samples(MixContext *s)
00319 {
00320 int i;
00321 int available_samples = INT_MAX;
00322
00323 av_assert0(s->nb_inputs > 1);
00324
00325 for (i = 1; i < s->nb_inputs; i++) {
00326 int nb_samples;
00327 if (s->input_state[i] == INPUT_OFF)
00328 continue;
00329 nb_samples = av_audio_fifo_size(s->fifos[i]);
00330 available_samples = FFMIN(available_samples, nb_samples);
00331 }
00332 if (available_samples == INT_MAX)
00333 return 0;
00334 return available_samples;
00335 }
00336
00340 static int request_samples(AVFilterContext *ctx, int min_samples)
00341 {
00342 MixContext *s = ctx->priv;
00343 int i, ret;
00344
00345 av_assert0(s->nb_inputs > 1);
00346
00347 for (i = 1; i < s->nb_inputs; i++) {
00348 ret = 0;
00349 if (s->input_state[i] == INPUT_OFF)
00350 continue;
00351 while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
00352 ret = avfilter_request_frame(ctx->inputs[i]);
00353 if (ret == AVERROR_EOF) {
00354 if (av_audio_fifo_size(s->fifos[i]) == 0) {
00355 s->input_state[i] = INPUT_OFF;
00356 continue;
00357 }
00358 } else if (ret)
00359 return ret;
00360 }
00361 return 0;
00362 }
00363
00370 static int calc_active_inputs(MixContext *s)
00371 {
00372 int i;
00373 int active_inputs = 0;
00374 for (i = 0; i < s->nb_inputs; i++)
00375 active_inputs += !!(s->input_state[i] != INPUT_OFF);
00376 s->active_inputs = active_inputs;
00377
00378 if (!active_inputs ||
00379 (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
00380 (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
00381 return AVERROR_EOF;
00382 return 0;
00383 }
00384
00385 static int request_frame(AVFilterLink *outlink)
00386 {
00387 AVFilterContext *ctx = outlink->src;
00388 MixContext *s = ctx->priv;
00389 int ret;
00390 int wanted_samples, available_samples;
00391
00392 if (s->input_state[0] == INPUT_OFF) {
00393 ret = request_samples(ctx, 1);
00394 if (ret < 0)
00395 return ret;
00396
00397 ret = calc_active_inputs(s);
00398 if (ret < 0)
00399 return ret;
00400
00401 available_samples = get_available_samples(s);
00402 if (!available_samples)
00403 return 0;
00404
00405 return output_frame(outlink, available_samples);
00406 }
00407
00408 if (s->frame_list->nb_frames == 0) {
00409 ret = avfilter_request_frame(ctx->inputs[0]);
00410 if (ret == AVERROR_EOF) {
00411 s->input_state[0] = INPUT_OFF;
00412 if (s->nb_inputs == 1)
00413 return AVERROR_EOF;
00414 else
00415 return AVERROR(EAGAIN);
00416 } else if (ret)
00417 return ret;
00418 }
00419 av_assert0(s->frame_list->nb_frames > 0);
00420
00421 wanted_samples = frame_list_next_frame_size(s->frame_list);
00422 ret = request_samples(ctx, wanted_samples);
00423 if (ret < 0)
00424 return ret;
00425
00426 ret = calc_active_inputs(s);
00427 if (ret < 0)
00428 return ret;
00429
00430 if (s->active_inputs > 1) {
00431 available_samples = get_available_samples(s);
00432 if (!available_samples)
00433 return 0;
00434 available_samples = FFMIN(available_samples, wanted_samples);
00435 } else {
00436 available_samples = wanted_samples;
00437 }
00438
00439 s->next_pts = frame_list_next_pts(s->frame_list);
00440 frame_list_remove_samples(s->frame_list, available_samples);
00441
00442 return output_frame(outlink, available_samples);
00443 }
00444
00445 static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *buf)
00446 {
00447 AVFilterContext *ctx = inlink->dst;
00448 MixContext *s = ctx->priv;
00449 AVFilterLink *outlink = ctx->outputs[0];
00450 int i;
00451
00452 for (i = 0; i < ctx->input_count; i++)
00453 if (ctx->inputs[i] == inlink)
00454 break;
00455 if (i >= ctx->input_count) {
00456 av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
00457 return;
00458 }
00459
00460 if (i == 0) {
00461 int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
00462 outlink->time_base);
00463 frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
00464 }
00465
00466 av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
00467 buf->audio->nb_samples);
00468
00469 avfilter_unref_buffer(buf);
00470 }
00471
00472 static int init(AVFilterContext *ctx, const char *args, void *opaque)
00473 {
00474 MixContext *s = ctx->priv;
00475 int i, ret;
00476
00477 s->class = &amix_class;
00478 av_opt_set_defaults(s);
00479
00480 if ((ret = av_set_options_string(s, args, "=", ":")) < 0) {
00481 av_log(ctx, AV_LOG_ERROR, "Error parsing options string '%s'.\n", args);
00482 return ret;
00483 }
00484 av_opt_free(s);
00485
00486 for (i = 0; i < s->nb_inputs; i++) {
00487 char name[32];
00488 AVFilterPad pad = { 0 };
00489
00490 snprintf(name, sizeof(name), "input%d", i);
00491 pad.type = AVMEDIA_TYPE_AUDIO;
00492 pad.name = av_strdup(name);
00493 pad.filter_samples = filter_samples;
00494
00495 avfilter_insert_inpad(ctx, i, &pad);
00496 }
00497
00498 return 0;
00499 }
00500
00501 static void uninit(AVFilterContext *ctx)
00502 {
00503 int i;
00504 MixContext *s = ctx->priv;
00505
00506 if (s->fifos) {
00507 for (i = 0; i < s->nb_inputs; i++)
00508 av_audio_fifo_free(s->fifos[i]);
00509 av_freep(&s->fifos);
00510 }
00511 frame_list_clear(s->frame_list);
00512 av_freep(&s->frame_list);
00513 av_freep(&s->input_state);
00514 av_freep(&s->input_scale);
00515
00516 for (i = 0; i < ctx->input_count; i++)
00517 av_freep(&ctx->input_pads[i].name);
00518 }
00519
00520 static int query_formats(AVFilterContext *ctx)
00521 {
00522 AVFilterFormats *formats = NULL;
00523 avfilter_add_format(&formats, AV_SAMPLE_FMT_FLT);
00524 avfilter_set_common_formats(ctx, formats);
00525 ff_set_common_channel_layouts(ctx, ff_all_channel_layouts());
00526 ff_set_common_samplerates(ctx, ff_all_samplerates());
00527 return 0;
00528 }
00529
00530 AVFilter avfilter_af_amix = {
00531 .name = "amix",
00532 .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
00533 .priv_size = sizeof(MixContext),
00534
00535 .init = init,
00536 .uninit = uninit,
00537 .query_formats = query_formats,
00538
00539 .inputs = (const AVFilterPad[]) {{ .name = NULL}},
00540 .outputs = (const AVFilterPad[]) {{ .name = "default",
00541 .type = AVMEDIA_TYPE_AUDIO,
00542 .config_props = config_output,
00543 .request_frame = request_frame },
00544 { .name = NULL}},
00545 };