FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
f_select.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Stefano Sabatini
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * filter for selecting which frame passes in the filterchain
24  */
25 
26 #include "libavutil/avstring.h"
27 #include "libavutil/eval.h"
28 #include "libavutil/fifo.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/opt.h"
31 #include "avfilter.h"
32 #include "audio.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "video.h"
36 
37 #if CONFIG_AVCODEC
38 #include "libavcodec/dsputil.h"
39 #endif
40 
41 static const char *const var_names[] = {
42  "TB", ///< timebase
43 
44  "pts", ///< original pts in the file of the frame
45  "start_pts", ///< first PTS in the stream, expressed in TB units
46  "prev_pts", ///< previous frame PTS
47  "prev_selected_pts", ///< previous selected frame PTS
48 
49  "t", ///< first PTS in seconds
50  "start_t", ///< first PTS in the stream, expressed in seconds
51  "prev_t", ///< previous frame time
52  "prev_selected_t", ///< previously selected time
53 
54  "pict_type", ///< the type of picture in the movie
55  "I",
56  "P",
57  "B",
58  "S",
59  "SI",
60  "SP",
61  "BI",
62  "PICT_TYPE_I",
63  "PICT_TYPE_P",
64  "PICT_TYPE_B",
65  "PICT_TYPE_S",
66  "PICT_TYPE_SI",
67  "PICT_TYPE_SP",
68  "PICT_TYPE_BI",
69 
70  "interlace_type", ///< the frame interlace type
71  "PROGRESSIVE",
72  "TOPFIRST",
73  "BOTTOMFIRST",
74 
75  "consumed_samples_n",///< number of samples consumed by the filter (only audio)
76  "samples_n", ///< number of samples in the current frame (only audio)
77  "sample_rate", ///< sample rate (only audio)
78 
79  "n", ///< frame number (starting from zero)
80  "selected_n", ///< selected frame number (starting from zero)
81  "prev_selected_n", ///< number of the last selected frame
82 
83  "key", ///< tell if the frame is a key frame
84  "pos", ///< original position in the file of the frame
85 
86  "scene",
87 
88  NULL
89 };
90 
91 enum var_name {
93 
98 
103 
119 
124 
128 
132 
135 
137 
139 };
140 
141 typedef struct SelectContext {
142  const AVClass *class;
143  char *expr_str;
146  int do_scene_detect; ///< 1 if the expression requires scene detection variables, 0 otherwise
147 #if CONFIG_AVCODEC
148  AVCodecContext *avctx; ///< codec context required for the DSPContext (scene detect only)
149  DSPContext c; ///< context providing optimized SAD methods (scene detect only)
150  double prev_mafd; ///< previous MAFD (scene detect only)
151 #endif
152  AVFrame *prev_picref; ///< previous frame (scene detect only)
153  double select;
154  int select_out; ///< mark the selected output pad index
156 } SelectContext;
157 
158 #define OFFSET(x) offsetof(SelectContext, x)
159 #define DEFINE_OPTIONS(filt_name, FLAGS) \
160 static const AVOption filt_name##_options[] = { \
161  { "expr", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \
162  { "e", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \
163  { "outputs", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \
164  { "n", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \
165  { NULL } \
166 }
167 
168 static int request_frame(AVFilterLink *outlink);
169 
170 static av_cold int init(AVFilterContext *ctx)
171 {
172  SelectContext *select = ctx->priv;
173  int i, ret;
174 
175  if ((ret = av_expr_parse(&select->expr, select->expr_str,
176  var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) {
177  av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n",
178  select->expr_str);
179  return ret;
180  }
181  select->do_scene_detect = !!strstr(select->expr_str, "scene");
182 
183  for (i = 0; i < select->nb_outputs; i++) {
184  AVFilterPad pad = { 0 };
185 
186  pad.name = av_asprintf("output%d", i);
187  if (!pad.name)
188  return AVERROR(ENOMEM);
189  pad.type = ctx->filter->inputs[0].type;
191  ff_insert_outpad(ctx, i, &pad);
192  }
193 
194  return 0;
195 }
196 
197 #define INTERLACE_TYPE_P 0
198 #define INTERLACE_TYPE_T 1
199 #define INTERLACE_TYPE_B 2
200 
201 static int config_input(AVFilterLink *inlink)
202 {
203  SelectContext *select = inlink->dst->priv;
204 
205  select->var_values[VAR_N] = 0.0;
206  select->var_values[VAR_SELECTED_N] = 0.0;
207 
208  select->var_values[VAR_TB] = av_q2d(inlink->time_base);
209 
210  select->var_values[VAR_PREV_PTS] = NAN;
213  select->var_values[VAR_PREV_T] = NAN;
214  select->var_values[VAR_START_PTS] = NAN;
215  select->var_values[VAR_START_T] = NAN;
216 
229 
233 
234  select->var_values[VAR_PICT_TYPE] = NAN;
235  select->var_values[VAR_INTERLACE_TYPE] = NAN;
236  select->var_values[VAR_SCENE] = NAN;
238  select->var_values[VAR_SAMPLES_N] = NAN;
239 
240  select->var_values[VAR_SAMPLE_RATE] =
241  inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
242 
243 #if CONFIG_AVCODEC
244  if (select->do_scene_detect) {
245  select->avctx = avcodec_alloc_context3(NULL);
246  if (!select->avctx)
247  return AVERROR(ENOMEM);
248  avpriv_dsputil_init(&select->c, select->avctx);
249  }
250 #endif
251  return 0;
252 }
253 
254 #if CONFIG_AVCODEC
255 static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
256 {
257  double ret = 0;
258  SelectContext *select = ctx->priv;
259  AVFrame *prev_picref = select->prev_picref;
260 
261  if (prev_picref &&
262  frame->height == prev_picref->height &&
263  frame->width == prev_picref->width &&
264  frame->linesize[0] == prev_picref->linesize[0]) {
265  int x, y, nb_sad = 0;
266  int64_t sad = 0;
267  double mafd, diff;
268  uint8_t *p1 = frame->data[0];
269  uint8_t *p2 = prev_picref->data[0];
270  const int linesize = frame->linesize[0];
271 
272  for (y = 0; y < frame->height - 8; y += 8) {
273  for (x = 0; x < frame->width*3 - 8; x += 8) {
274  sad += select->c.sad[1](NULL, p1 + x, p2 + x,
275  linesize, 8);
276  nb_sad += 8 * 8;
277  }
278  p1 += 8 * linesize;
279  p2 += 8 * linesize;
280  }
281  emms_c();
282  mafd = nb_sad ? (double)sad / nb_sad : 0;
283  diff = fabs(mafd - select->prev_mafd);
284  ret = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
285  select->prev_mafd = mafd;
286  av_frame_free(&prev_picref);
287  }
288  select->prev_picref = av_frame_clone(frame);
289  return ret;
290 }
291 #endif
292 
293 #define D2TS(d) (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d))
294 #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts))
295 
296 static void select_frame(AVFilterContext *ctx, AVFrame *frame)
297 {
298  SelectContext *select = ctx->priv;
299  AVFilterLink *inlink = ctx->inputs[0];
300  double res;
301 
302  if (isnan(select->var_values[VAR_START_PTS]))
303  select->var_values[VAR_START_PTS] = TS2D(frame->pts);
304  if (isnan(select->var_values[VAR_START_T]))
305  select->var_values[VAR_START_T] = TS2D(frame->pts) * av_q2d(inlink->time_base);
306 
307  select->var_values[VAR_N ] = inlink->frame_count;
308  select->var_values[VAR_PTS] = TS2D(frame->pts);
309  select->var_values[VAR_T ] = TS2D(frame->pts) * av_q2d(inlink->time_base);
310  select->var_values[VAR_POS] = av_frame_get_pkt_pos(frame) == -1 ? NAN : av_frame_get_pkt_pos(frame);
311 
312  switch (inlink->type) {
313  case AVMEDIA_TYPE_AUDIO:
314  select->var_values[VAR_SAMPLES_N] = frame->nb_samples;
315  break;
316 
317  case AVMEDIA_TYPE_VIDEO:
318  select->var_values[VAR_INTERLACE_TYPE] =
321  select->var_values[VAR_PICT_TYPE] = frame->pict_type;
322 #if CONFIG_AVCODEC
323  if (select->do_scene_detect) {
324  char buf[32];
325  select->var_values[VAR_SCENE] = get_scene_score(ctx, frame);
326  // TODO: document metadata
327  snprintf(buf, sizeof(buf), "%f", select->var_values[VAR_SCENE]);
328  av_dict_set(avpriv_frame_get_metadatap(frame), "lavfi.scene_score", buf, 0);
329  }
330 #endif
331  break;
332  }
333 
334  select->select = res = av_expr_eval(select->expr, select->var_values, NULL);
335  av_log(inlink->dst, AV_LOG_DEBUG,
336  "n:%f pts:%f t:%f key:%d",
337  select->var_values[VAR_N],
338  select->var_values[VAR_PTS],
339  select->var_values[VAR_T],
340  (int)select->var_values[VAR_KEY]);
341 
342  switch (inlink->type) {
343  case AVMEDIA_TYPE_VIDEO:
344  av_log(inlink->dst, AV_LOG_DEBUG, " interlace_type:%c pict_type:%c scene:%f",
347  select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_B ? 'B' : '?',
349  select->var_values[VAR_SCENE]);
350  break;
351  case AVMEDIA_TYPE_AUDIO:
352  av_log(inlink->dst, AV_LOG_DEBUG, " samples_n:%d consumed_samples_n:%d",
353  (int)select->var_values[VAR_SAMPLES_N],
354  (int)select->var_values[VAR_CONSUMED_SAMPLES_N]);
355  break;
356  }
357 
358  if (res == 0) {
359  select->select_out = -1; /* drop */
360  } else if (isnan(res) || res < 0) {
361  select->select_out = 0; /* first output */
362  } else {
363  select->select_out = FFMIN(ceilf(res)-1, select->nb_outputs-1); /* other outputs */
364  }
365 
366  av_log(inlink->dst, AV_LOG_DEBUG, " -> select:%f select_out:%d\n", res, select->select_out);
367 
368  if (res) {
369  select->var_values[VAR_PREV_SELECTED_N] = select->var_values[VAR_N];
371  select->var_values[VAR_PREV_SELECTED_T] = select->var_values[VAR_T];
372  select->var_values[VAR_SELECTED_N] += 1.0;
373  if (inlink->type == AVMEDIA_TYPE_AUDIO)
374  select->var_values[VAR_CONSUMED_SAMPLES_N] += frame->nb_samples;
375  }
376 
377  select->var_values[VAR_PREV_PTS] = select->var_values[VAR_PTS];
378  select->var_values[VAR_PREV_T] = select->var_values[VAR_T];
379 }
380 
381 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
382 {
383  AVFilterContext *ctx = inlink->dst;
384  SelectContext *select = ctx->priv;
385 
386  select_frame(ctx, frame);
387  if (select->select)
388  return ff_filter_frame(ctx->outputs[select->select_out], frame);
389 
390  av_frame_free(&frame);
391  return 0;
392 }
393 
394 static int request_frame(AVFilterLink *outlink)
395 {
396  AVFilterContext *ctx = outlink->src;
397  SelectContext *select = ctx->priv;
398  AVFilterLink *inlink = outlink->src->inputs[0];
399  int out_no = FF_OUTLINK_IDX(outlink);
400 
401  do {
402  int ret = ff_request_frame(inlink);
403  if (ret < 0)
404  return ret;
405  } while (select->select_out != out_no);
406 
407  return 0;
408 }
409 
410 static av_cold void uninit(AVFilterContext *ctx)
411 {
412  SelectContext *select = ctx->priv;
413  int i;
414 
415  av_expr_free(select->expr);
416  select->expr = NULL;
417 
418  for (i = 0; i < ctx->nb_outputs; i++)
419  av_freep(&ctx->output_pads[i].name);
420 
421 #if CONFIG_AVCODEC
422  if (select->do_scene_detect) {
423  av_frame_free(&select->prev_picref);
424  if (select->avctx) {
425  avcodec_close(select->avctx);
426  av_freep(&select->avctx);
427  }
428  }
429 #endif
430 }
431 
433 {
434  SelectContext *select = ctx->priv;
435 
436  if (!select->do_scene_detect) {
437  return ff_default_query_formats(ctx);
438  } else {
439  static const enum AVPixelFormat pix_fmts[] = {
442  };
444  }
445  return 0;
446 }
447 
448 #if CONFIG_ASELECT_FILTER
449 
451 AVFILTER_DEFINE_CLASS(aselect);
452 
453 static av_cold int aselect_init(AVFilterContext *ctx)
454 {
455  SelectContext *select = ctx->priv;
456  int ret;
457 
458  if ((ret = init(ctx)) < 0)
459  return ret;
460 
461  if (select->do_scene_detect) {
462  av_log(ctx, AV_LOG_ERROR, "Scene detection is ignored in aselect filter\n");
463  return AVERROR(EINVAL);
464  }
465 
466  return 0;
467 }
468 
469 static const AVFilterPad avfilter_af_aselect_inputs[] = {
470  {
471  .name = "default",
472  .type = AVMEDIA_TYPE_AUDIO,
473  .config_props = config_input,
474  .filter_frame = filter_frame,
475  },
476  { NULL }
477 };
478 
479 AVFilter ff_af_aselect = {
480  .name = "aselect",
481  .description = NULL_IF_CONFIG_SMALL("Select audio frames to pass in output."),
482  .init = aselect_init,
483  .uninit = uninit,
484  .priv_size = sizeof(SelectContext),
485  .inputs = avfilter_af_aselect_inputs,
486  .priv_class = &aselect_class,
488 };
489 #endif /* CONFIG_ASELECT_FILTER */
490 
491 #if CONFIG_SELECT_FILTER
492 
494 AVFILTER_DEFINE_CLASS(select);
495 
496 static av_cold int select_init(AVFilterContext *ctx)
497 {
498  SelectContext *select = ctx->priv;
499  int ret;
500 
501  if ((ret = init(ctx)) < 0)
502  return ret;
503 
504  if (select->do_scene_detect && !CONFIG_AVCODEC) {
505  av_log(ctx, AV_LOG_ERROR, "Scene detection is not available without libavcodec.\n");
506  return AVERROR(EINVAL);
507  }
508 
509  return 0;
510 }
511 
512 static const AVFilterPad avfilter_vf_select_inputs[] = {
513  {
514  .name = "default",
515  .type = AVMEDIA_TYPE_VIDEO,
516  .config_props = config_input,
517  .filter_frame = filter_frame,
518  },
519  { NULL }
520 };
521 
522 AVFilter ff_vf_select = {
523  .name = "select",
524  .description = NULL_IF_CONFIG_SMALL("Select video frames to pass in output."),
525  .init = select_init,
526  .uninit = uninit,
527  .query_formats = query_formats,
528  .priv_size = sizeof(SelectContext),
529  .priv_class = &select_class,
530  .inputs = avfilter_vf_select_inputs,
532 };
533 #endif /* CONFIG_SELECT_FILTER */