FFmpeg
silenceremove_template.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #undef ftype
20 #undef FABS
21 #undef FMAX
22 #undef SAMPLE_FORMAT
23 #undef SQRT
24 #undef ZERO
25 #undef ONE
26 #undef TMIN
27 #if DEPTH == 32
28 #define SAMPLE_FORMAT flt
29 #define SQRT sqrtf
30 #define FMAX fmaxf
31 #define FABS fabsf
32 #define ftype float
33 #define ZERO 0.f
34 #define ONE 1.f
35 #define TMIN -FLT_MAX
36 #else
37 #define SAMPLE_FORMAT dbl
38 #define SQRT sqrt
39 #define FMAX fmax
40 #define FABS fabs
41 #define ftype double
42 #define ZERO 0.0
43 #define ONE 1.0
44 #define TMIN -DBL_MAX
45 #endif
46 
47 #define fn3(a,b) a##_##b
48 #define fn2(a,b) fn3(a,b)
49 #define fn(a) fn2(a, SAMPLE_FORMAT)
50 
51 static void fn(flush)(ftype *dst, const ftype *src, int src_pos,
52  int nb_channels, int count, int src_nb_samples,
53  int *out_nb_samples)
54 {
55  int oidx, out_count = count;
56  int sidx = src_pos;
57 
58  if (count <= 0)
59  return;
60 
61  oidx = *out_nb_samples + out_count - 1;
62  *out_nb_samples += out_count;
63  while (out_count-- > 0) {
64  const int spos = sidx * nb_channels;
65  const int opos = oidx * nb_channels;
66 
67  for (int ch = 0; ch < nb_channels; ch++)
68  dst[opos + ch] = src[spos + ch];
69 
70  oidx--;
71  sidx--;
72  if (sidx < 0)
73  sidx = src_nb_samples - 1;
74  }
75 }
76 
78  const ftype *src,
79  ftype *queue,
80  int *queue_pos,
81  int *queue_size,
82  int *window_pos,
83  int *window_size,
84  const int nb_channels,
85  const int nb_samples,
86  const int window_nb_samples)
87 {
88  const int pos = *queue_pos * nb_channels;
89 
90  for (int ch = 0; ch < nb_channels; ch++)
91  queue[pos + ch] = src[ch];
92 
93  (*queue_pos)++;
94  if (*queue_pos >= nb_samples)
95  *queue_pos = 0;
96 
97  if (*queue_size < nb_samples)
98  (*queue_size)++;
99 
100  if (*window_size < window_nb_samples)
101  (*window_size)++;
102 
103  (*window_pos)++;
104  if (*window_pos >= window_nb_samples)
105  *window_pos = 0;
106 }
107 
108 static ftype fn(compute_avg)(ftype *cache, ftype x, ftype px,
109  int window_size, int *unused, int *unused2)
110 {
111  ftype r;
112 
113  cache[0] += FABS(x);
114  cache[0] -= FABS(px);
115  cache[0] = r = FMAX(cache[0], ZERO);
116 
117  return r / window_size;
118 }
119 
120 #define PEAKS(empty_value,op,sample, psample)\
121  if (!empty && psample == ss[front]) { \
122  ss[front] = empty_value; \
123  if (back != front) { \
124  front--; \
125  if (front < 0) \
126  front = n - 1; \
127  } \
128  empty = front == back; \
129  } \
130  \
131  if (!empty && sample op ss[front]) { \
132  while (1) { \
133  ss[front] = empty_value; \
134  if (back == front) { \
135  empty = 1; \
136  break; \
137  } \
138  front--; \
139  if (front < 0) \
140  front = n - 1; \
141  } \
142  } \
143  \
144  while (!empty && sample op ss[back]) { \
145  ss[back] = empty_value; \
146  if (back == front) { \
147  empty = 1; \
148  break; \
149  } \
150  back++; \
151  if (back >= n) \
152  back = 0; \
153  } \
154  \
155  if (!empty) { \
156  back--; \
157  if (back < 0) \
158  back = n - 1; \
159  }
160 
162  int n, int *ffront, int *bback)
163 {
164  ftype r, ax = FABS(x);
165  int front = *ffront;
166  int back = *bback;
167  int empty = front == back && ss[front] == -ONE;
168  int idx;
169 
170  PEAKS(-ONE, >, ax, FABS(px))
171 
172  ss[back] = ax;
173  idx = (back <= front) ? back + (front - back + 1) / 2 : back + (n + front - back + 1) / 2;
174  if (idx >= n)
175  idx -= n;
176  av_assert2(idx >= 0 && idx < n);
177  r = ss[idx];
178 
179  *ffront = front;
180  *bback = back;
181 
182  return r;
183 }
184 
186  int n, int *ffront, int *bback)
187 {
188  ftype r, ax = FABS(x);
189  int front = *ffront;
190  int back = *bback;
191  int empty = front == back && ss[front] == ZERO;
192 
193  PEAKS(ZERO, >=, ax, FABS(px))
194 
195  ss[back] = ax;
196  r = ss[front];
197 
198  *ffront = front;
199  *bback = back;
200 
201  return r;
202 }
203 
205  int n, int *ffront, int *bback)
206 {
207  int front = *ffront;
208  int back = *bback;
209  int empty = front == back && ss[front] == TMIN;
210  ftype r, max, min;
211 
212  PEAKS(TMIN, >=, x, px)
213 
214  ss[back] = x;
215  max = ss[front];
216  min = x;
217  r = FABS(min) + FABS(max - min);
218 
219  *ffront = front;
220  *bback = back;
221 
222  return r;
223 }
224 
225 static ftype fn(compute_rms)(ftype *cache, ftype x, ftype px,
226  int window_size, int *unused, int *unused2)
227 {
228  ftype r;
229 
230  cache[0] += x * x;
231  cache[0] -= px * px;
232  cache[0] = r = FMAX(cache[0], ZERO);
233 
234  return SQRT(r / window_size);
235 }
236 
238  int n, int *unused, int *unused2)
239 {
240  ftype r;
241 
242  ss[0] += x;
243  ss[0] -= px;
244 
245  ss[1] += x * x;
246  ss[1] -= px * px;
247  ss[1] = FMAX(ss[1], ZERO);
248 
249  r = FMAX(ss[1] - ss[0] * ss[0] / n, ZERO) / n;
250 
251  return SQRT(r);
252 }
253 
255  const ftype *src, ftype *dst,
256  int *nb_out_samples,
257  const int nb_channels)
258 {
259  SilenceRemoveContext *s = ctx->priv;
260  const int start_periods = s->start_periods;
261  int out_nb_samples = *nb_out_samples;
262  const int start_window_nb_samples = s->start_window->nb_samples;
263  const int start_nb_samples = s->start_queuef->nb_samples;
264  const int start_wpos = s->start_window_pos * nb_channels;
265  const int start_pos = s->start_queue_pos * nb_channels;
266  ftype *startw = (ftype *)s->start_window->data[0];
267  ftype *start = (ftype *)s->start_queuef->data[0];
268  const ftype start_threshold = s->start_threshold;
269  const int start_mode = s->start_mode;
270  int start_thres = (start_mode == T_ANY) ? 0 : 1;
271  const int start_duration = s->start_duration;
272  ftype *start_cache = (ftype *)s->start_cache;
273  const int start_silence = s->start_silence;
274  int window_size = start_window_nb_samples;
275  const int cache_size = s->cache_size;
276  int *front = s->start_front;
277  int *back = s->start_back;
278 
279  fn(queue_sample)(ctx, src, start,
280  &s->start_queue_pos,
281  &s->start_queue_size,
282  &s->start_window_pos,
283  &s->start_window_size,
284  nb_channels,
285  start_nb_samples,
286  start_window_nb_samples);
287 
288  if (s->start_found_periods < 0)
289  goto skip;
290 
291  if (s->detection != D_PEAK && s->detection != D_MEDIAN &&
292  s->detection != D_PTP)
293  window_size = s->start_window_size;
294 
295  for (int ch = 0; ch < nb_channels; ch++) {
296  ftype start_sample = start[start_pos + ch];
297  ftype start_ow = startw[start_wpos + ch];
298  ftype tstart;
299 
300  tstart = fn(s->compute)(start_cache + ch * cache_size,
301  start_sample,
302  start_ow,
303  window_size,
304  front + ch,
305  back + ch);
306 
307  startw[start_wpos + ch] = start_sample;
308 
309  if (start_mode == T_ANY) {
310  start_thres |= tstart > start_threshold;
311  } else {
312  start_thres &= tstart > start_threshold;
313  }
314  }
315 
316  if (s->start_found_periods >= 0) {
317  if (start_silence > 0) {
318  s->start_silence_count++;
319  if (s->start_silence_count > start_silence)
320  s->start_silence_count = start_silence;
321  }
322 
323  s->start_sample_count += start_thres;
324  }
325 
326  if (s->start_sample_count > start_duration) {
327  s->start_found_periods++;
328  if (s->start_found_periods >= start_periods) {
329  if (!ctx->is_disabled)
330  fn(flush)(dst, start, s->start_queue_pos, nb_channels,
331  s->start_silence_count, start_nb_samples,
332  &out_nb_samples);
333  s->start_silence_count = 0;
334  s->start_found_periods = -1;
335  }
336 
337  s->start_sample_count = 0;
338  }
339 
340 skip:
341  if (s->start_found_periods < 0 || ctx->is_disabled) {
342  const int dst_pos = out_nb_samples * nb_channels;
343  for (int ch = 0; ch < nb_channels; ch++)
344  dst[dst_pos + ch] = start[start_pos + ch];
345  out_nb_samples++;
346  }
347 
348  *nb_out_samples = out_nb_samples;
349 }
350 
352  const ftype *src, ftype *dst,
353  int *nb_out_samples,
354  const int nb_channels)
355 {
356  SilenceRemoveContext *s = ctx->priv;
357  const int stop_periods = s->stop_periods;
358  int out_nb_samples = *nb_out_samples;
359  const int stop_window_nb_samples = s->stop_window->nb_samples;
360  const int stop_nb_samples = s->stop_queuef->nb_samples;
361  const int stop_wpos = s->stop_window_pos * nb_channels;
362  const int stop_pos = s->stop_queue_pos * nb_channels;
363  ftype *stopw = (ftype *)s->stop_window->data[0];
364  const ftype stop_threshold = s->stop_threshold;
365  ftype *stop = (ftype *)s->stop_queuef->data[0];
366  const int stop_mode = s->stop_mode;
367  int stop_thres = (stop_mode == T_ANY) ? 0 : 1;
368  const int stop_duration = s->stop_duration;
369  ftype *stop_cache = (ftype *)s->stop_cache;
370  const int stop_silence = s->stop_silence;
371  int window_size = stop_window_nb_samples;
372  const int cache_size = s->cache_size;
373  const int restart = s->restart;
374  int *front = s->stop_front;
375  int *back = s->stop_back;
376 
377  fn(queue_sample)(ctx, src, stop,
378  &s->stop_queue_pos,
379  &s->stop_queue_size,
380  &s->stop_window_pos,
381  &s->stop_window_size,
382  nb_channels,
383  stop_nb_samples,
384  stop_window_nb_samples);
385 
386  if (s->detection != D_PEAK && s->detection != D_MEDIAN &&
387  s->detection != D_PTP)
388  window_size = s->stop_window_size;
389 
390  for (int ch = 0; ch < nb_channels; ch++) {
391  ftype stop_sample = stop[stop_pos + ch];
392  ftype stop_ow = stopw[stop_wpos + ch];
393  ftype tstop;
394 
395  tstop = fn(s->compute)(stop_cache + ch * cache_size,
396  stop_sample,
397  stop_ow,
398  window_size,
399  front + ch,
400  back + ch);
401 
402  stopw[stop_wpos + ch] = stop_sample;
403 
404  if (stop_mode == T_ANY) {
405  stop_thres |= tstop <= stop_threshold;
406  } else {
407  stop_thres &= tstop <= stop_threshold;
408  }
409  }
410 
411  s->found_nonsilence = FFMAX(s->found_nonsilence, !stop_thres);
412  if (restart && !stop_thres)
413  s->stop_found_periods = 0;
414 
415  if (s->stop_found_periods >= 0 || ctx->is_disabled) {
416  if (s->found_nonsilence) {
417  s->stop_sample_count += stop_thres;
418  s->stop_sample_count *= stop_thres;
419  }
420  } else if (s->stop_silence_count > 0) {
421  const int dst_pos = out_nb_samples * nb_channels;
422  for (int ch = 0; ch < nb_channels; ch++)
423  dst[dst_pos + ch] = stop[stop_pos + ch];
424  s->stop_silence_count--;
425  out_nb_samples++;
426  }
427 
428  if (s->stop_sample_count > stop_duration) {
429  s->stop_found_periods++;
430  if (s->stop_found_periods >= stop_periods) {
431  s->stop_found_periods = -1;
432  s->stop_silence_count = stop_silence;
433  }
434 
435  s->stop_sample_count = 0;
436  }
437 
438  if (s->stop_found_periods >= 0 || ctx->is_disabled) {
439  const int dst_pos = out_nb_samples * nb_channels;
440  for (int ch = 0; ch < nb_channels; ch++)
441  dst[dst_pos + ch] = stop[stop_pos + ch];
442  out_nb_samples++;
443  }
444 
445  *nb_out_samples = out_nb_samples;
446 }
r
const char * r
Definition: vf_curves.c:126
compute_rms
static ftype fn() compute_rms(ftype *cache, ftype x, ftype px, int window_size, int *unused, int *unused2)
Definition: silenceremove_template.c:225
SQRT
#define SQRT
Definition: silenceremove_template.c:38
compute_ptp
static ftype fn() compute_ptp(ftype *ss, ftype x, ftype px, int n, int *ffront, int *bback)
Definition: silenceremove_template.c:204
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
TMIN
#define TMIN
Definition: silenceremove_template.c:44
ss
#define ss(width, name, subs,...)
Definition: cbs_vp9.c:202
FMAX
#define FMAX
Definition: silenceremove_template.c:39
compute_peak
static ftype fn() compute_peak(ftype *ss, ftype x, ftype px, int n, int *ffront, int *bback)
Definition: silenceremove_template.c:185
compute_median
static ftype fn() compute_median(ftype *ss, ftype x, ftype px, int n, int *ffront, int *bback)
Definition: silenceremove_template.c:161
PEAKS
#define PEAKS(empty_value, op, sample, psample)
Definition: silenceremove_template.c:120
s
#define s(width, name)
Definition: cbs_vp9.c:198
compute_avg
static ftype fn() compute_avg(ftype *cache, ftype x, ftype px, int window_size, int *unused, int *unused2)
Definition: silenceremove_template.c:108
compute_dev
static ftype fn() compute_dev(ftype *ss, ftype x, ftype px, int n, int *unused, int *unused2)
Definition: silenceremove_template.c:237
ZERO
#define ZERO
Definition: silenceremove_template.c:42
ctx
AVFormatContext * ctx
Definition: movenc.c:48
SilenceRemoveContext
Definition: af_silenceremove.c:54
FABS
#define FABS
Definition: silenceremove_template.c:40
ftype
#define ftype
Definition: silenceremove_template.c:41
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
ONE
#define ONE
Definition: silenceremove_template.c:43
filter_stop
static void fn() filter_stop(AVFilterContext *ctx, const ftype *src, ftype *dst, int *nb_out_samples, const int nb_channels)
Definition: silenceremove_template.c:351
pos
unsigned int pos
Definition: spdifenc.c:413
D_PEAK
@ D_PEAK
Definition: af_silenceremove.c:36
flush
static void fn() flush(ftype *dst, const ftype *src, int src_pos, int nb_channels, int count, int src_nb_samples, int *out_nb_samples)
Definition: silenceremove_template.c:51
fn
#define fn(a)
Definition: silenceremove_template.c:49
queue_sample
static void fn() queue_sample(AVFilterContext *ctx, const ftype *src, ftype *queue, int *queue_pos, int *queue_size, int *window_pos, int *window_size, const int nb_channels, const int nb_samples, const int window_nb_samples)
Definition: silenceremove_template.c:77
AVFilterContext
An instance of a filter.
Definition: avfilter.h:409
D_MEDIAN
@ D_MEDIAN
Definition: af_silenceremove.c:37
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
filter_start
static void fn() filter_start(AVFilterContext *ctx, const ftype *src, ftype *dst, int *nb_out_samples, const int nb_channels)
Definition: silenceremove_template.c:254
D_PTP
@ D_PTP
Definition: af_silenceremove.c:38
skip
static void BS_FUNC() skip(BSCTX *bc, unsigned int n)
Skip n bits in the buffer.
Definition: bitstream_template.h:375
T_ANY
@ T_ANY
Definition: af_silenceremove.c:50
min
float min
Definition: vorbis_enc_data.h:429