FFmpeg
tx.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "cpu.h"
20 #include "qsort.h"
21 #include "bprint.h"
22 
23 #include "tx_priv.h"
24 
25 #define TYPE_IS(type, x) \
26  (((x) == AV_TX_FLOAT_ ## type) || \
27  ((x) == AV_TX_DOUBLE_ ## type) || \
28  ((x) == AV_TX_INT32_ ## type))
29 
30 /* Calculates the modular multiplicative inverse */
31 static av_always_inline int mulinv(int n, int m)
32 {
33  n = n % m;
34  for (int x = 1; x < m; x++)
35  if (((n * x) % m) == 1)
36  return x;
37  av_assert0(0); /* Never reached */
38  return 0;
39 }
40 
41 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
43 {
44  int *in_map, *out_map;
45  const int inv = s->inv;
46  const int len = n*m; /* Will not be equal to s->len for MDCTs */
47  const int mdct = TYPE_IS(MDCT, s->type);
48  int m_inv, n_inv;
49 
50  /* Make sure the numbers are coprime */
51  if (av_gcd(n, m) != 1)
52  return AVERROR(EINVAL);
53 
54  m_inv = mulinv(m, n);
55  n_inv = mulinv(n, m);
56 
57  if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
58  return AVERROR(ENOMEM);
59 
60  in_map = s->map;
61  out_map = s->map + len;
62 
63  /* Ruritanian map for input, CRT map for output, can be swapped */
64  for (int j = 0; j < m; j++) {
65  for (int i = 0; i < n; i++) {
66  /* Shifted by 1 to simplify MDCTs */
67  in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
68  out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
69  }
70  }
71 
72  /* Change transform direction by reversing all ACs */
73  if (inv) {
74  for (int i = 0; i < m; i++) {
75  int *in = &in_map[i*n + 1]; /* Skip the DC */
76  for (int j = 0; j < ((n - 1) >> 1); j++)
77  FFSWAP(int, in[j], in[n - j - 2]);
78  }
79  }
80 
81  /* Our 15-point transform is also a compound one, so embed its input map */
82  if (n == 15) {
83  for (int k = 0; k < m; k++) {
84  int tmp[15];
85  memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
86  for (int i = 0; i < 5; i++) {
87  for (int j = 0; j < 3; j++)
88  in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
89  }
90  }
91  }
92 
93  return 0;
94 }
95 
96 static inline int split_radix_permutation(int i, int len, int inv)
97 {
98  len >>= 1;
99  if (len <= 1)
100  return i & 1;
101  if (!(i & len))
102  return split_radix_permutation(i, len, inv) * 2;
103  len >>= 1;
104  return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
105 }
106 
107 int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
108 {
109  int len = s->len;
110 
111  if (!(s->map = av_malloc(len*sizeof(*s->map))))
112  return AVERROR(ENOMEM);
113 
114  if (invert_lookup) {
115  for (int i = 0; i < s->len; i++)
116  s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
117  } else {
118  for (int i = 0; i < s->len; i++)
119  s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
120  }
121 
122  return 0;
123 }
124 
126 {
127  int *src_map, out_map_idx = 0, len = s->len;
128 
129  if (!s->sub || !s->sub->map)
130  return AVERROR(EINVAL);
131 
132  if (!(s->map = av_mallocz(len*sizeof(*s->map))))
133  return AVERROR(ENOMEM);
134 
135  src_map = s->sub->map;
136 
137  /* The first coefficient is always already in-place */
138  for (int src = 1; src < s->len; src++) {
139  int dst = src_map[src];
140  int found = 0;
141 
142  if (dst <= src)
143  continue;
144 
145  /* This just checks if a closed loop has been encountered before,
146  * and if so, skips it, since to fully permute a loop we must only
147  * enter it once. */
148  do {
149  for (int j = 0; j < out_map_idx; j++) {
150  if (dst == s->map[j]) {
151  found = 1;
152  break;
153  }
154  }
155  dst = src_map[dst];
156  } while (dst != src && !found);
157 
158  if (!found)
159  s->map[out_map_idx++] = src;
160  }
161 
162  s->map[out_map_idx++] = 0;
163 
164  return 0;
165 }
166 
167 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
168  int is_dual, int dual_high, int len,
169  int basis, int dual_stride, int inv_lookup)
170 {
171  len >>= 1;
172 
173  if (len <= basis) {
174  int k1, k2, stride, even_idx, odd_idx;
175 
176  is_dual = is_dual && dual_stride;
177  dual_high = is_dual & dual_high;
178  stride = is_dual ? FFMIN(dual_stride, len) : 0;
179 
180  even_idx = offset + dual_high*(stride - 2*len);
181  odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
182 
183  for (int i = 0; i < len; i++) {
184  k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
185  k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
186  if (inv_lookup) {
187  revtab[even_idx++] = k1;
188  revtab[odd_idx++] = k2;
189  } else {
190  revtab[k1] = even_idx++;
191  revtab[k2] = odd_idx++;
192  }
193  if (stride && !((i + 1) % stride)) {
194  even_idx += stride;
195  odd_idx += stride;
196  }
197  }
198 
199  return;
200  }
201 
202  parity_revtab_generator(revtab, n, inv, offset,
203  0, 0, len >> 0, basis, dual_stride, inv_lookup);
204  parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
205  1, 0, len >> 1, basis, dual_stride, inv_lookup);
206  parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
207  1, 1, len >> 1, basis, dual_stride, inv_lookup);
208 }
209 
211  int basis, int dual_stride)
212 {
213  int len = s->len;
214  int inv = s->inv;
215 
216  if (!(s->map = av_mallocz(len*sizeof(*s->map))))
217  return AVERROR(ENOMEM);
218 
219  basis >>= 1;
220  if (len < basis)
221  return AVERROR(EINVAL);
222 
223  av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
224  av_assert0(dual_stride <= basis);
225  parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
226  basis, dual_stride, invert_lookup);
227 
228  return 0;
229 }
230 
231 static void reset_ctx(AVTXContext *s)
232 {
233  if (!s)
234  return;
235 
236  if (s->sub)
237  for (int i = 0; i < s->nb_sub; i++)
238  reset_ctx(&s->sub[i]);
239 
240  if (s->cd_self->uninit)
241  s->cd_self->uninit(s);
242 
243  av_freep(&s->sub);
244  av_freep(&s->map);
245  av_freep(&s->exp);
246  av_freep(&s->tmp);
247 
248  memset(s, 0, sizeof(*s));
249 }
250 
252 {
253  if (!(*ctx))
254  return;
255 
256  reset_ctx(*ctx);
257  av_freep(ctx);
258 }
259 
261  uint64_t flags, FFTXCodeletOptions *opts,
262  int len, int inv, const void *scale)
263 {
264  /* Can only handle one sample+type to one sample+type transforms */
265  if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
266  return AVERROR(EINVAL);
267  return 0;
268 }
269 
270 /* Null transform when the length is 1 */
271 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
272 {
273  memcpy(_out, _in, stride);
274 }
275 
276 static const FFTXCodelet ff_tx_null_def = {
277  .name = NULL_IF_CONFIG_SMALL("null"),
278  .function = ff_tx_null,
279  .type = TX_TYPE_ANY,
280  .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
282  .factors[0] = TX_FACTOR_ANY,
283  .min_len = 1,
284  .max_len = 1,
285  .init = ff_tx_null_init,
286  .cpu_flags = FF_TX_CPU_FLAGS_ALL,
287  .prio = FF_TX_PRIO_MAX,
288 };
289 
290 static const FFTXCodelet * const ff_tx_null_list[] = {
292  NULL,
293 };
294 
295 #if !CONFIG_SMALL
296 static void print_flags(AVBPrint *bp, uint64_t f)
297 {
298  int prev = 0;
299  const char *sep = ", ";
300  av_bprintf(bp, "flags: [");
301  if ((f & FF_TX_ALIGNED) && ++prev)
302  av_bprintf(bp, "aligned");
303  if ((f & AV_TX_UNALIGNED) && ++prev)
304  av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
305  if ((f & AV_TX_INPLACE) && ++prev)
306  av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
307  if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
308  av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
309  if ((f & FF_TX_FORWARD_ONLY) && ++prev)
310  av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
311  if ((f & FF_TX_INVERSE_ONLY) && ++prev)
312  av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
313  if ((f & FF_TX_PRESHUFFLE) && ++prev)
314  av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
315  if ((f & AV_TX_FULL_IMDCT) && ++prev)
316  av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
317  av_bprintf(bp, "]");
318 }
319 
320 static void print_type(AVBPrint *bp, enum AVTXType type)
321 {
322  av_bprintf(bp, "%s",
323  type == TX_TYPE_ANY ? "any" :
324  type == AV_TX_FLOAT_FFT ? "fft_float" :
325  type == AV_TX_FLOAT_MDCT ? "mdct_float" :
326  type == AV_TX_FLOAT_RDFT ? "rdft_float" :
327  type == AV_TX_DOUBLE_FFT ? "fft_double" :
328  type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
329  type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
330  type == AV_TX_INT32_FFT ? "fft_int32" :
331  type == AV_TX_INT32_MDCT ? "mdct_int32" :
332  type == AV_TX_INT32_RDFT ? "rdft_int32" :
333  "unknown");
334 }
335 
336 static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
337 {
338  AVBPrint bp = { 0 };
340 
341  av_bprintf(&bp, "%s - type: ", cd->name);
342 
343  print_type(&bp, cd->type);
344 
345  av_bprintf(&bp, ", len: ");
346  if (cd->min_len != cd->max_len)
347  av_bprintf(&bp, "[%i, ", cd->min_len);
348 
349  if (cd->max_len == TX_LEN_UNLIMITED)
350  av_bprintf(&bp, "∞");
351  else
352  av_bprintf(&bp, "%i", cd->max_len);
353 
354  av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : "");
355  for (int i = 0; i < TX_MAX_SUB; i++) {
356  if (i && cd->factors[i])
357  av_bprintf(&bp, ", ");
358  if (cd->factors[i] == TX_FACTOR_ANY)
359  av_bprintf(&bp, "any");
360  else if (cd->factors[i])
361  av_bprintf(&bp, "%i", cd->factors[i]);
362  else
363  break;
364  }
365 
366  av_bprintf(&bp, "], ");
367  print_flags(&bp, cd->flags);
368 
369  if (print_prio)
370  av_bprintf(&bp, ", prio: %i", prio);
371 
372  av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
373 }
374 
375 static void print_tx_structure(AVTXContext *s, int depth)
376 {
377  const FFTXCodelet *cd = s->cd_self;
378 
379  for (int i = 0; i <= depth; i++)
380  av_log(NULL, AV_LOG_VERBOSE, " ");
381 
382  print_cd_info(cd, cd->prio, 0);
383 
384  for (int i = 0; i < s->nb_sub; i++)
385  print_tx_structure(&s->sub[i], depth + 1);
386 }
387 #endif /* CONFIG_SMALL */
388 
389 typedef struct TXCodeletMatch {
390  const FFTXCodelet *cd;
391  int prio;
393 
395 {
396  return FFDIFFSIGN(b->prio, a->prio);
397 }
398 
399 /* We want all factors to completely cover the length */
400 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
401 {
402  int all_flag = 0;
403 
404  for (int i = 0; i < TX_MAX_SUB; i++) {
405  int factor = cd->factors[i];
406 
407  /* Conditions satisfied */
408  if (len == 1)
409  return 1;
410 
411  /* No more factors */
412  if (!factor) {
413  break;
414  } else if (factor == TX_FACTOR_ANY) {
415  all_flag = 1;
416  continue;
417  }
418 
419  if (factor == 2) { /* Fast path */
420  int bits_2 = ff_ctz(len);
421  if (!bits_2)
422  return 0; /* Factor not supported */
423 
424  len >>= bits_2;
425  } else {
426  int res = len % factor;
427  if (res)
428  return 0; /* Factor not supported */
429 
430  while (!res) {
431  len /= factor;
432  res = len % factor;
433  }
434  }
435  }
436 
437  return all_flag || (len == 1);
438 }
439 
441  uint64_t flags, FFTXCodeletOptions *opts,
442  int len, int inv, const void *scale)
443 {
444  int ret = 0;
445  AVTXContext *sub = NULL;
446  TXCodeletMatch *cd_tmp, *cd_matches = NULL;
447  unsigned int cd_matches_size = 0;
448  int nb_cd_matches = 0;
449 #if !CONFIG_SMALL
450  AVBPrint bp = { 0 };
451 #endif
452 
453  /* Array of all compiled codelet lists. Order is irrelevant. */
454  const FFTXCodelet * const * const codelet_list[] = {
459 #if HAVE_X86ASM
461 #endif
462  };
463  int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
464 
465  /* We still accept functions marked with SLOW, even if the CPU is
466  * marked with the same flag, but we give them lower priority. */
467  const int cpu_flags = av_get_cpu_flags();
468  const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
471 
472  static const int slow_penalties[][2] = {
473  { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
474  { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
475  { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
476  { AV_CPU_FLAG_ATOM, 1 + 128 },
477  { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
478  { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
479  };
480 
481  /* Flags the transform wants */
482  uint64_t req_flags = flags;
483 
484  /* Flags the codelet may require to be present */
485  uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE;
486 
487  /* Unaligned codelets are compatible with the aligned flag */
488  if (req_flags & FF_TX_ALIGNED)
489  req_flags |= AV_TX_UNALIGNED;
490 
491  /* If either flag is set, both are okay, so don't check for an exact match */
492  if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
493  req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
494  if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
495  req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
496 
497  /* Loop through all codelets in all codelet lists to find matches
498  * to the requirements */
499  while (codelet_list_num--) {
500  const FFTXCodelet * const * list = codelet_list[codelet_list_num];
501  const FFTXCodelet *cd = NULL;
502 
503  while ((cd = *list++)) {
504  int max_factor = 0;
505 
506  /* Check if the type matches */
507  if (cd->type != TX_TYPE_ANY && type != cd->type)
508  continue;
509 
510  /* Check direction for non-orthogonal codelets */
511  if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
512  ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
513  continue;
514 
515  /* Check if the requested flags match from both sides */
516  if (((req_flags & cd->flags) != (req_flags)) ||
517  ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
518  continue;
519 
520  /* Check if length is supported */
521  if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
522  continue;
523 
524  /* Check if the CPU supports the required ISA */
525  if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
526  !(cpu_flags & (cd->cpu_flags & ~slow_mask)))
527  continue;
528 
529  /* Check for factors */
530  if (!check_cd_factors(cd, len))
531  continue;
532 
533  /* Realloc array and append */
534  cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
535  sizeof(*cd_tmp) * (nb_cd_matches + 1));
536  if (!cd_tmp) {
537  av_free(cd_matches);
538  return AVERROR(ENOMEM);
539  }
540 
541  cd_matches = cd_tmp;
542  cd_matches[nb_cd_matches].cd = cd;
543  cd_matches[nb_cd_matches].prio = cd->prio;
544 
545  /* If the CPU has a SLOW flag, and the instruction is also flagged
546  * as being slow for such, reduce its priority */
547  for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
548  if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
549  cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
550  }
551 
552  /* Prioritize aligned-only codelets */
553  if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
554  cd_matches[nb_cd_matches].prio += 64;
555 
556  /* Codelets for specific lengths are generally faster */
557  if ((len == cd->min_len) && (len == cd->max_len))
558  cd_matches[nb_cd_matches].prio += 64;
559 
560  /* Forward-only or inverse-only transforms are generally better */
562  cd_matches[nb_cd_matches].prio += 64;
563 
564  /* Larger factors are generally better */
565  for (int i = 0; i < TX_MAX_SUB; i++)
566  max_factor = FFMAX(cd->factors[i], max_factor);
567  if (max_factor)
568  cd_matches[nb_cd_matches].prio += 16*max_factor;
569 
570  nb_cd_matches++;
571  }
572  }
573 
574 #if !CONFIG_SMALL
575  /* Print debugging info */
577  av_bprintf(&bp, "For transform of length %i, %s, ", len,
578  inv ? "inverse" : "forward");
579  print_type(&bp, type);
580  av_bprintf(&bp, ", ");
581  print_flags(&bp, flags);
582  av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
583  nb_cd_matches ? ":" : ".");
584 #endif
585 
586  /* No matches found */
587  if (!nb_cd_matches)
588  return AVERROR(ENOSYS);
589 
590  /* Sort the list */
591  AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
592 
593 #if !CONFIG_SMALL
594  av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
595 
596  for (int i = 0; i < nb_cd_matches; i++) {
597  av_log(NULL, AV_LOG_VERBOSE, " %i: ", i + 1);
598  print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1);
599  }
600 #endif
601 
602  if (!s->sub) {
603  s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
604  if (!sub) {
605  ret = AVERROR(ENOMEM);
606  goto end;
607  }
608  }
609 
610  /* Attempt to initialize each */
611  for (int i = 0; i < nb_cd_matches; i++) {
612  const FFTXCodelet *cd = cd_matches[i].cd;
613  AVTXContext *sctx = &s->sub[s->nb_sub];
614 
615  sctx->len = len;
616  sctx->inv = inv;
617  sctx->type = type;
618  sctx->flags = flags;
619  sctx->cd_self = cd;
620 
621  s->fn[s->nb_sub] = cd->function;
622  s->cd[s->nb_sub] = cd;
623 
624  ret = 0;
625  if (cd->init)
626  ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
627 
628  if (ret >= 0) {
629  s->nb_sub++;
630  goto end;
631  }
632 
633  s->fn[s->nb_sub] = NULL;
634  s->cd[s->nb_sub] = NULL;
635 
636  reset_ctx(sctx);
637  if (ret == AVERROR(ENOMEM))
638  break;
639  }
640 
641  if (!s->nb_sub)
642  av_freep(&s->sub);
643 
644 end:
645  av_free(cd_matches);
646  return ret;
647 }
648 
650  int inv, int len, const void *scale, uint64_t flags)
651 {
652  int ret;
653  AVTXContext tmp = { 0 };
654  const double default_scale_d = 1.0;
655  const float default_scale_f = 1.0f;
656 
657  if (!len || type >= AV_TX_NB || !ctx || !tx)
658  return AVERROR(EINVAL);
659 
660  if (!(flags & AV_TX_UNALIGNED))
661  flags |= FF_TX_ALIGNED;
662  if (!(flags & AV_TX_INPLACE))
664 
665  if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT)))
666  scale = &default_scale_f;
667  else if (!scale && (type == AV_TX_DOUBLE_MDCT))
668  scale = &default_scale_d;
669 
670  ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
671  if (ret < 0)
672  return ret;
673 
674  *ctx = &tmp.sub[0];
675  *tx = tmp.fn[0];
676 
677 #if !CONFIG_SMALL
678  av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n");
679  print_tx_structure(*ctx, 0);
680 #endif
681 
682  return ret;
683 }
AV_TX_DOUBLE_MDCT
@ AV_TX_DOUBLE_MDCT
Definition: tx.h:69
AV_CPU_FLAG_SSSE3SLOW
#define AV_CPU_FLAG_SSSE3SLOW
SSSE3 supported, but usually not faster.
Definition: cpu.h:42
ff_tx_gen_ptwo_inplace_revtab_idx
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
Definition: tx.c:125
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_ctz
#define ff_ctz
Definition: intmath.h:106
TXCodeletMatch
Definition: tx.c:389
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
sub
static float sub(float src0, float src1)
Definition: dnn_backend_native_layer_mathbinary.c:31
AV_CPU_FLAG_SSE3SLOW
#define AV_CPU_FLAG_SSE3SLOW
SSE3 supported, but usually not faster.
Definition: cpu.h:39
AVTXContext
Definition: tx_priv.h:201
ff_tx_null_def
static const FFTXCodelet ff_tx_null_def
Definition: tx.c:276
basis
static int16_t basis[64][64]
Definition: mpegvideo_enc.c:4097
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
FFTXCodeletOptions
Definition: tx_priv.h:160
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
Definition: tx.c:42
b
#define b
Definition: input.c:34
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
AV_TX_DOUBLE_FFT
@ AV_TX_DOUBLE_FFT
Definition: tx.h:48
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:101
FF_TX_CPU_FLAGS_ALL
#define FF_TX_CPU_FLAGS_ALL
Definition: tx_priv.h:196
print_type
static void print_type(AVBPrint *bp, enum AVTXType type)
Definition: tx.c:320
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
cmp_matches
static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
Definition: tx.c:394
print_flags
static void print_flags(AVBPrint *bp, uint64_t f)
Definition: tx.c:296
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:649
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
av_gcd
int64_t av_gcd(int64_t a, int64_t b)
Compute the greatest common divisor of two integer operands.
Definition: mathematics.c:37
ff_tx_null_init
static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:260
AV_BPRINT_SIZE_AUTOMATIC
#define AV_BPRINT_SIZE_AUTOMATIC
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1389
TX_MAX_SUB
#define TX_MAX_SUB
Definition: tx_priv.h:166
FFTXCodelet::min_len
int min_len
Definition: tx_priv.h:180
FF_TX_ALIGNED
#define FF_TX_ALIGNED
Definition: tx_priv.h:144
FFTXCodelet::type
enum AVTXType type
Definition: tx_priv.h:171
FFDIFFSIGN
#define FFDIFFSIGN(x, y)
Comparator.
Definition: macros.h:45
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:58
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
AV_TX_NB
@ AV_TX_NB
Definition: tx.h:93
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:111
AV_TX_FLOAT_MDCT
@ AV_TX_FLOAT_MDCT
Standard MDCT with a sample data type of float, double or int32_t, respecively.
Definition: tx.h:68
av_fast_realloc
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given buffer if it is not large enough, otherwise do nothing.
Definition: mem.c:505
s
#define s(width, name)
Definition: cbs_vp9.c:256
print_cd_info
static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
Definition: tx.c:336
FFTXCodelet::flags
uint64_t flags
Definition: tx_priv.h:174
FFTXCodelet::prio
int prio
Definition: tx_priv.h:198
AV_TX_INT32_MDCT
@ AV_TX_INT32_MDCT
Definition: tx.h:70
AVTXContext::type
enum AVTXType type
Definition: tx_priv.h:222
ff_tx_gen_split_radix_parity_revtab
int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int invert_lookup, int basis, int dual_stride)
Definition: tx.c:210
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
FF_TX_FORWARD_ONLY
#define FF_TX_FORWARD_ONLY
Definition: tx_priv.h:147
AVTXContext::len
int len
Definition: tx_priv.h:204
AV_TX_FLOAT_FFT
@ AV_TX_FLOAT_FFT
Standard complex to complex FFT with sample data type of AVComplexFloat, AVComplexDouble or AVComplex...
Definition: tx.h:47
split_radix_permutation
static int split_radix_permutation(int i, int len, int inv)
Definition: tx.c:96
ctx
AVFormatContext * ctx
Definition: movenc.c:48
FFTXCodelet::cpu_flags
int cpu_flags
Definition: tx_priv.h:193
TX_TYPE_ANY
#define TX_TYPE_ANY
Definition: tx_priv.h:172
AV_TX_FULL_IMDCT
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
Definition: tx.h:136
reset_ctx
static void reset_ctx(AVTXContext *s)
Definition: tx.c:231
opts
AVDictionary * opts
Definition: movenc.c:50
TXCodeletMatch::cd
const FFTXCodelet * cd
Definition: tx.c:390
NULL
#define NULL
Definition: coverity.c:32
AV_TX_INPLACE
@ AV_TX_INPLACE
Performs an in-place transformation on the input.
Definition: tx.h:122
ff_tx_null
static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx.c:271
FF_TX_OUT_OF_PLACE
#define FF_TX_OUT_OF_PLACE
Definition: tx_priv.h:143
list
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining list
Definition: filter_design.txt:25
FF_TX_PRIO_MAX
@ FF_TX_PRIO_MAX
Definition: tx_priv.h:156
FFTXCodelet::init
int(* init)(AVTXContext *s, const struct FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_priv.h:184
AV_TX_UNALIGNED
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
Definition: tx.h:128
TXCodeletMatch::prio
int prio
Definition: tx.c:391
qsort.h
FF_TX_PRESHUFFLE
#define FF_TX_PRESHUFFLE
Definition: tx_priv.h:145
f
f
Definition: af_crystalizer.c:122
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
AV_CPU_FLAG_AVXSLOW
#define AV_CPU_FLAG_AVXSLOW
AVX supported, but slow when using YMM registers (e.g. Bulldozer)
Definition: cpu.h:48
cpu.h
AVTXType
AVTXType
Definition: tx.h:39
AV_CPU_FLAG_SSE2SLOW
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
Definition: cpu.h:35
ff_tx_codelet_list_double_c
const FFTXCodelet *const ff_tx_codelet_list_double_c[]
AV_TX_INT32_FFT
@ AV_TX_INT32_FFT
Definition: tx.h:49
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
AVTXContext::inv
int inv
Definition: tx_priv.h:205
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:251
FFTXCodelet::max_len
int max_len
Definition: tx_priv.h:181
bprint.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
AV_QSORT
#define AV_QSORT(p, num, type, cmp)
Quicksort This sort is fast, and fully inplace but not stable and it is possible to construct input t...
Definition: qsort.h:33
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
Definition: tx.c:107
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_TX_DOUBLE_RDFT
@ AV_TX_DOUBLE_RDFT
Definition: tx.h:89
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:264
len
int len
Definition: vorbis_enc_data.h:426
TX_LEN_UNLIMITED
#define TX_LEN_UNLIMITED
Definition: tx_priv.h:182
stride
#define stride
Definition: h264pred_template.c:537
AV_CPU_FLAG_ATOM
#define AV_CPU_FLAG_ATOM
Atom processor, some SSSE3 instructions are slower.
Definition: cpu.h:43
tx_priv.h
AVTXContext::flags
uint64_t flags
Definition: tx_priv.h:223
ret
ret
Definition: filter_design.txt:187
ff_tx_init_subtx
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:440
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
TX_FACTOR_ANY
#define TX_FACTOR_ANY
Definition: tx_priv.h:178
FF_TX_INVERSE_ONLY
#define FF_TX_INVERSE_ONLY
Definition: tx_priv.h:146
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:94
ff_tx_codelet_list_float_c
const FFTXCodelet *const ff_tx_codelet_list_float_c[]
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:88
FFTXCodelet
Definition: tx_priv.h:168
check_cd_factors
static int check_cd_factors(const FFTXCodelet *cd, int len)
Definition: tx.c:400
AV_TX_INT32_RDFT
@ AV_TX_INT32_RDFT
Definition: tx.h:90
AVTXContext::cd_self
const FFTXCodelet * cd_self
Definition: tx_priv.h:221
FFTXCodelet::name
const char * name
Definition: tx_priv.h:169
factor
static const int factor[16]
Definition: vf_pp7.c:76
ff_tx_codelet_list_float_x86
const FFTXCodelet *const ff_tx_codelet_list_float_x86[]
Definition: tx_float_init.c:66
print_tx_structure
static void print_tx_structure(AVTXContext *s, int depth)
Definition: tx.c:375
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_tx_null_list
static const FFTXCodelet *const ff_tx_null_list[]
Definition: tx.c:290
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
mulinv
static av_always_inline int mulinv(int n, int m)
Definition: tx.c:31
ff_tx_codelet_list_int32_c
const FFTXCodelet *const ff_tx_codelet_list_int32_c[]
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
FFTXCodelet::factors
int factors[TX_MAX_SUB]
Definition: tx_priv.h:177
TYPE_IS
#define TYPE_IS(type, x)
Definition: tx.c:25
FFTXCodelet::function
av_tx_fn function
Definition: tx_priv.h:170
parity_revtab_generator
static void parity_revtab_generator(int *revtab, int n, int inv, int offset, int is_dual, int dual_high, int len, int basis, int dual_stride, int inv_lookup)
Definition: tx.c:167