FFmpeg
tx.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "avassert.h"
20 #include "cpu.h"
21 #include "qsort.h"
22 #include "bprint.h"
23 
24 #include "tx_priv.h"
25 
26 #define TYPE_IS(type, x) \
27  (((x) == AV_TX_FLOAT_ ## type) || \
28  ((x) == AV_TX_DOUBLE_ ## type) || \
29  ((x) == AV_TX_INT32_ ## type))
30 
31 /* Calculates the modular multiplicative inverse */
32 static av_always_inline int mulinv(int n, int m)
33 {
34  n = n % m;
35  for (int x = 1; x < m; x++)
36  if (((n * x) % m) == 1)
37  return x;
38  av_assert0(0); /* Never reached */
39  return 0;
40 }
41 
42 /* Guaranteed to work for any n, m where gcd(n, m) == 1 */
44 {
45  int *in_map, *out_map;
46  const int inv = s->inv;
47  const int len = n*m; /* Will not be equal to s->len for MDCTs */
48  int m_inv, n_inv;
49 
50  /* Make sure the numbers are coprime */
51  if (av_gcd(n, m) != 1)
52  return AVERROR(EINVAL);
53 
54  m_inv = mulinv(m, n);
55  n_inv = mulinv(n, m);
56 
57  if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
58  return AVERROR(ENOMEM);
59 
60  in_map = s->map;
61  out_map = s->map + len;
62 
63  /* Ruritanian map for input, CRT map for output, can be swapped */
64  for (int j = 0; j < m; j++) {
65  for (int i = 0; i < n; i++) {
66  in_map[j*n + i] = (i*m + j*n) % len;
67  out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
68  }
69  }
70 
71  /* Change transform direction by reversing all ACs */
72  if (inv) {
73  for (int i = 0; i < m; i++) {
74  int *in = &in_map[i*n + 1]; /* Skip the DC */
75  for (int j = 0; j < ((n - 1) >> 1); j++)
76  FFSWAP(int, in[j], in[n - j - 2]);
77  }
78  }
79 
80  /* Our 15-point transform is also a compound one, so embed its input map */
81  if (n == 15) {
82  for (int k = 0; k < m; k++) {
83  int tmp[15];
84  memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
85  for (int i = 0; i < 5; i++) {
86  for (int j = 0; j < 3; j++)
87  in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
88  }
89  }
90  }
91 
92  return 0;
93 }
94 
95 static inline int split_radix_permutation(int i, int len, int inv)
96 {
97  len >>= 1;
98  if (len <= 1)
99  return i & 1;
100  if (!(i & len))
101  return split_radix_permutation(i, len, inv) * 2;
102  len >>= 1;
103  return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
104 }
105 
106 int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
107 {
108  int len = s->len;
109 
110  if (!(s->map = av_malloc(len*sizeof(*s->map))))
111  return AVERROR(ENOMEM);
112 
113  if (invert_lookup) {
114  for (int i = 0; i < s->len; i++)
115  s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
116  } else {
117  for (int i = 0; i < s->len; i++)
118  s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
119  }
120 
121  return 0;
122 }
123 
125 {
126  int *src_map, out_map_idx = 0, len = s->len;
127 
128  if (!s->sub || !s->sub->map)
129  return AVERROR(EINVAL);
130 
131  if (!(s->map = av_mallocz(len*sizeof(*s->map))))
132  return AVERROR(ENOMEM);
133 
134  src_map = s->sub->map;
135 
136  /* The first coefficient is always already in-place */
137  for (int src = 1; src < s->len; src++) {
138  int dst = src_map[src];
139  int found = 0;
140 
141  if (dst <= src)
142  continue;
143 
144  /* This just checks if a closed loop has been encountered before,
145  * and if so, skips it, since to fully permute a loop we must only
146  * enter it once. */
147  do {
148  for (int j = 0; j < out_map_idx; j++) {
149  if (dst == s->map[j]) {
150  found = 1;
151  break;
152  }
153  }
154  dst = src_map[dst];
155  } while (dst != src && !found);
156 
157  if (!found)
158  s->map[out_map_idx++] = src;
159  }
160 
161  s->map[out_map_idx++] = 0;
162 
163  return 0;
164 }
165 
166 static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
167  int is_dual, int dual_high, int len,
168  int basis, int dual_stride, int inv_lookup)
169 {
170  len >>= 1;
171 
172  if (len <= basis) {
173  int k1, k2, stride, even_idx, odd_idx;
174 
175  is_dual = is_dual && dual_stride;
176  dual_high = is_dual & dual_high;
177  stride = is_dual ? FFMIN(dual_stride, len) : 0;
178 
179  even_idx = offset + dual_high*(stride - 2*len);
180  odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
181 
182  for (int i = 0; i < len; i++) {
183  k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
184  k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
185  if (inv_lookup) {
186  revtab[even_idx++] = k1;
187  revtab[odd_idx++] = k2;
188  } else {
189  revtab[k1] = even_idx++;
190  revtab[k2] = odd_idx++;
191  }
192  if (stride && !((i + 1) % stride)) {
193  even_idx += stride;
194  odd_idx += stride;
195  }
196  }
197 
198  return;
199  }
200 
201  parity_revtab_generator(revtab, n, inv, offset,
202  0, 0, len >> 0, basis, dual_stride, inv_lookup);
203  parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
204  1, 0, len >> 1, basis, dual_stride, inv_lookup);
205  parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
206  1, 1, len >> 1, basis, dual_stride, inv_lookup);
207 }
208 
210  int inv_lookup, int basis, int dual_stride)
211 {
212  basis >>= 1;
213  if (len < basis)
214  return AVERROR(EINVAL);
215 
216  if (!(s->map = av_mallocz(len*sizeof(*s->map))))
217  return AVERROR(ENOMEM);
218 
219  av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
220  av_assert0(dual_stride <= basis);
221 
222  parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
223  basis, dual_stride, inv_lookup != 0);
224 
225  return 0;
226 }
227 
228 static void reset_ctx(AVTXContext *s)
229 {
230  if (!s)
231  return;
232 
233  if (s->sub)
234  for (int i = 0; i < s->nb_sub; i++)
235  reset_ctx(&s->sub[i]);
236 
237  if (s->cd_self->uninit)
238  s->cd_self->uninit(s);
239 
240  av_freep(&s->sub);
241  av_freep(&s->map);
242  av_freep(&s->exp);
243  av_freep(&s->tmp);
244 
245  memset(s, 0, sizeof(*s));
246 }
247 
249 {
250  if (!(*ctx))
251  return;
252 
253  reset_ctx(*ctx);
254  av_freep(ctx);
255 }
256 
258  uint64_t flags, FFTXCodeletOptions *opts,
259  int len, int inv, const void *scale)
260 {
261  /* Can only handle one sample+type to one sample+type transforms */
262  if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
263  return AVERROR(EINVAL);
264  return 0;
265 }
266 
267 /* Null transform when the length is 1 */
268 static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
269 {
270  memcpy(_out, _in, stride);
271 }
272 
273 static const FFTXCodelet ff_tx_null_def = {
274  .name = NULL_IF_CONFIG_SMALL("null"),
275  .function = ff_tx_null,
276  .type = TX_TYPE_ANY,
277  .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED |
279  .factors[0] = TX_FACTOR_ANY,
280  .min_len = 1,
281  .max_len = 1,
282  .init = ff_tx_null_init,
283  .cpu_flags = FF_TX_CPU_FLAGS_ALL,
284  .prio = FF_TX_PRIO_MAX,
285 };
286 
287 static const FFTXCodelet * const ff_tx_null_list[] = {
289  NULL,
290 };
291 
292 #if !CONFIG_SMALL
293 static void print_flags(AVBPrint *bp, uint64_t f)
294 {
295  int prev = 0;
296  const char *sep = ", ";
297  av_bprintf(bp, "flags: [");
298  if ((f & FF_TX_ALIGNED) && ++prev)
299  av_bprintf(bp, "aligned");
300  if ((f & AV_TX_UNALIGNED) && ++prev)
301  av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
302  if ((f & AV_TX_INPLACE) && ++prev)
303  av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
304  if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
305  av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
306  if ((f & FF_TX_FORWARD_ONLY) && ++prev)
307  av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
308  if ((f & FF_TX_INVERSE_ONLY) && ++prev)
309  av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
310  if ((f & FF_TX_PRESHUFFLE) && ++prev)
311  av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
312  if ((f & AV_TX_FULL_IMDCT) && ++prev)
313  av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
314  if ((f & FF_TX_ASM_CALL) && ++prev)
315  av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
316  av_bprintf(bp, "]");
317 }
318 
319 static void print_type(AVBPrint *bp, enum AVTXType type)
320 {
321  av_bprintf(bp, "%s",
322  type == TX_TYPE_ANY ? "any" :
323  type == AV_TX_FLOAT_FFT ? "fft_float" :
324  type == AV_TX_FLOAT_MDCT ? "mdct_float" :
325  type == AV_TX_FLOAT_RDFT ? "rdft_float" :
326  type == AV_TX_DOUBLE_FFT ? "fft_double" :
327  type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
328  type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
329  type == AV_TX_INT32_FFT ? "fft_int32" :
330  type == AV_TX_INT32_MDCT ? "mdct_int32" :
331  type == AV_TX_INT32_RDFT ? "rdft_int32" :
332  "unknown");
333 }
334 
335 static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
336 {
337  AVBPrint bp = { 0 };
339 
340  av_bprintf(&bp, "%s - type: ", cd->name);
341 
342  print_type(&bp, cd->type);
343 
344  av_bprintf(&bp, ", len: ");
345  if (cd->min_len != cd->max_len)
346  av_bprintf(&bp, "[%i, ", cd->min_len);
347 
348  if (cd->max_len == TX_LEN_UNLIMITED)
349  av_bprintf(&bp, "∞");
350  else
351  av_bprintf(&bp, "%i", cd->max_len);
352 
353  av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : "");
354  for (int i = 0; i < TX_MAX_SUB; i++) {
355  if (i && cd->factors[i])
356  av_bprintf(&bp, ", ");
357  if (cd->factors[i] == TX_FACTOR_ANY)
358  av_bprintf(&bp, "any");
359  else if (cd->factors[i])
360  av_bprintf(&bp, "%i", cd->factors[i]);
361  else
362  break;
363  }
364 
365  av_bprintf(&bp, "], ");
366  print_flags(&bp, cd->flags);
367 
368  if (print_prio)
369  av_bprintf(&bp, ", prio: %i", prio);
370 
371  av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
372 }
373 
374 static void print_tx_structure(AVTXContext *s, int depth)
375 {
376  const FFTXCodelet *cd = s->cd_self;
377 
378  for (int i = 0; i <= depth; i++)
379  av_log(NULL, AV_LOG_VERBOSE, " ");
380 
381  print_cd_info(cd, cd->prio, 0);
382 
383  for (int i = 0; i < s->nb_sub; i++)
384  print_tx_structure(&s->sub[i], depth + 1);
385 }
386 #endif /* CONFIG_SMALL */
387 
388 typedef struct TXCodeletMatch {
389  const FFTXCodelet *cd;
390  int prio;
392 
394 {
395  return FFDIFFSIGN(b->prio, a->prio);
396 }
397 
398 /* We want all factors to completely cover the length */
399 static inline int check_cd_factors(const FFTXCodelet *cd, int len)
400 {
401  int all_flag = 0;
402 
403  for (int i = 0; i < TX_MAX_SUB; i++) {
404  int factor = cd->factors[i];
405 
406  /* Conditions satisfied */
407  if (len == 1)
408  return 1;
409 
410  /* No more factors */
411  if (!factor) {
412  break;
413  } else if (factor == TX_FACTOR_ANY) {
414  all_flag = 1;
415  continue;
416  }
417 
418  if (factor == 2) { /* Fast path */
419  int bits_2 = ff_ctz(len);
420  if (!bits_2)
421  return 0; /* Factor not supported */
422 
423  len >>= bits_2;
424  } else {
425  int res = len % factor;
426  if (res)
427  return 0; /* Factor not supported */
428 
429  while (!res) {
430  len /= factor;
431  res = len % factor;
432  }
433  }
434  }
435 
436  return all_flag || (len == 1);
437 }
438 
440  uint64_t flags, FFTXCodeletOptions *opts,
441  int len, int inv, const void *scale)
442 {
443  int ret = 0;
444  AVTXContext *sub = NULL;
445  TXCodeletMatch *cd_tmp, *cd_matches = NULL;
446  unsigned int cd_matches_size = 0;
447  int nb_cd_matches = 0;
448 #if !CONFIG_SMALL
449  AVBPrint bp = { 0 };
450 #endif
451 
452  /* Array of all compiled codelet lists. Order is irrelevant. */
453  const FFTXCodelet * const * const codelet_list[] = {
458 #if HAVE_X86ASM
460 #endif
461 #if ARCH_AARCH64
463 #endif
464  };
465  int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
466 
467  /* We still accept functions marked with SLOW, even if the CPU is
468  * marked with the same flag, but we give them lower priority. */
469  const int cpu_flags = av_get_cpu_flags();
470  const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW |
473 
474  static const int slow_penalties[][2] = {
475  { AV_CPU_FLAG_SSE2SLOW, 1 + 64 },
476  { AV_CPU_FLAG_SSE3SLOW, 1 + 64 },
477  { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 },
478  { AV_CPU_FLAG_ATOM, 1 + 128 },
479  { AV_CPU_FLAG_AVXSLOW, 1 + 128 },
480  { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 },
481  };
482 
483  /* Flags the transform wants */
484  uint64_t req_flags = flags;
485 
486  /* Flags the codelet may require to be present */
487  uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL;
488 
489  /* Unaligned codelets are compatible with the aligned flag */
490  if (req_flags & FF_TX_ALIGNED)
491  req_flags |= AV_TX_UNALIGNED;
492 
493  /* If either flag is set, both are okay, so don't check for an exact match */
494  if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
495  req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
496  if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
497  req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
498 
499  /* Loop through all codelets in all codelet lists to find matches
500  * to the requirements */
501  while (codelet_list_num--) {
502  const FFTXCodelet * const * list = codelet_list[codelet_list_num];
503  const FFTXCodelet *cd = NULL;
504 
505  while ((cd = *list++)) {
506  int max_factor = 0;
507 
508  /* Check if the type matches */
509  if (cd->type != TX_TYPE_ANY && type != cd->type)
510  continue;
511 
512  /* Check direction for non-orthogonal codelets */
513  if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
514  ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
515  continue;
516 
517  /* Check if the requested flags match from both sides */
518  if (((req_flags & cd->flags) != (req_flags)) ||
519  ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
520  continue;
521 
522  /* Check if length is supported */
523  if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
524  continue;
525 
526  /* Check if the CPU supports the required ISA */
527  if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
528  !(cpu_flags & (cd->cpu_flags & ~slow_mask)))
529  continue;
530 
531  /* Check for factors */
532  if (!check_cd_factors(cd, len))
533  continue;
534 
535  /* Realloc array and append */
536  cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
537  sizeof(*cd_tmp) * (nb_cd_matches + 1));
538  if (!cd_tmp) {
539  av_free(cd_matches);
540  return AVERROR(ENOMEM);
541  }
542 
543  cd_matches = cd_tmp;
544  cd_matches[nb_cd_matches].cd = cd;
545  cd_matches[nb_cd_matches].prio = cd->prio;
546 
547  /* If the CPU has a SLOW flag, and the instruction is also flagged
548  * as being slow for such, reduce its priority */
549  for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
550  if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
551  cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
552  }
553 
554  /* Prioritize aligned-only codelets */
555  if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
556  cd_matches[nb_cd_matches].prio += 64;
557 
558  /* Codelets for specific lengths are generally faster */
559  if ((len == cd->min_len) && (len == cd->max_len))
560  cd_matches[nb_cd_matches].prio += 64;
561 
562  /* Forward-only or inverse-only transforms are generally better */
564  cd_matches[nb_cd_matches].prio += 64;
565 
566  /* Larger factors are generally better */
567  for (int i = 0; i < TX_MAX_SUB; i++)
568  max_factor = FFMAX(cd->factors[i], max_factor);
569  if (max_factor)
570  cd_matches[nb_cd_matches].prio += 16*max_factor;
571 
572  nb_cd_matches++;
573  }
574  }
575 
576 #if !CONFIG_SMALL
577  /* Print debugging info */
579  av_bprintf(&bp, "For transform of length %i, %s, ", len,
580  inv ? "inverse" : "forward");
581  print_type(&bp, type);
582  av_bprintf(&bp, ", ");
583  print_flags(&bp, flags);
584  av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
585  nb_cd_matches ? ":" : ".");
586 #endif
587 
588  /* No matches found */
589  if (!nb_cd_matches)
590  return AVERROR(ENOSYS);
591 
592  /* Sort the list */
593  AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
594 
595 #if !CONFIG_SMALL
596  av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
597 
598  for (int i = 0; i < nb_cd_matches; i++) {
599  av_log(NULL, AV_LOG_VERBOSE, " %i: ", i + 1);
600  print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1);
601  }
602 #endif
603 
604  if (!s->sub) {
605  s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
606  if (!sub) {
607  ret = AVERROR(ENOMEM);
608  goto end;
609  }
610  }
611 
612  /* Attempt to initialize each */
613  for (int i = 0; i < nb_cd_matches; i++) {
614  const FFTXCodelet *cd = cd_matches[i].cd;
615  AVTXContext *sctx = &s->sub[s->nb_sub];
616 
617  sctx->len = len;
618  sctx->inv = inv;
619  sctx->type = type;
620  sctx->flags = cd->flags | flags;
621  sctx->cd_self = cd;
622 
623  s->fn[s->nb_sub] = cd->function;
624  s->cd[s->nb_sub] = cd;
625 
626  ret = 0;
627  if (cd->init)
628  ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
629 
630  if (ret >= 0) {
631  s->nb_sub++;
632  goto end;
633  }
634 
635  s->fn[s->nb_sub] = NULL;
636  s->cd[s->nb_sub] = NULL;
637 
638  reset_ctx(sctx);
639  if (ret == AVERROR(ENOMEM))
640  break;
641  }
642 
643  if (!s->nb_sub)
644  av_freep(&s->sub);
645 
646 end:
647  av_free(cd_matches);
648  return ret;
649 }
650 
652  int inv, int len, const void *scale, uint64_t flags)
653 {
654  int ret;
655  AVTXContext tmp = { 0 };
656  const double default_scale_d = 1.0;
657  const float default_scale_f = 1.0f;
658 
659  if (!len || type >= AV_TX_NB || !ctx || !tx)
660  return AVERROR(EINVAL);
661 
662  if (!(flags & AV_TX_UNALIGNED))
663  flags |= FF_TX_ALIGNED;
664  if (!(flags & AV_TX_INPLACE))
666 
667  if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT)))
668  scale = &default_scale_f;
669  else if (!scale && (type == AV_TX_DOUBLE_MDCT))
670  scale = &default_scale_d;
671 
672  ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
673  if (ret < 0)
674  return ret;
675 
676  *ctx = &tmp.sub[0];
677  *tx = tmp.fn[0];
678 
679 #if !CONFIG_SMALL
680  av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n");
681  print_tx_structure(*ctx, 0);
682 #endif
683 
684  return ret;
685 }
AV_TX_DOUBLE_MDCT
@ AV_TX_DOUBLE_MDCT
Definition: tx.h:69
AV_CPU_FLAG_SSSE3SLOW
#define AV_CPU_FLAG_SSSE3SLOW
SSSE3 supported, but usually not faster.
Definition: cpu.h:42
ff_tx_gen_ptwo_inplace_revtab_idx
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
Definition: tx.c:124
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_ctz
#define ff_ctz
Definition: intmath.h:107
TXCodeletMatch
Definition: tx.c:388
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
sub
static float sub(float src0, float src1)
Definition: dnn_backend_native_layer_mathbinary.c:31
AV_CPU_FLAG_SSE3SLOW
#define AV_CPU_FLAG_SSE3SLOW
SSE3 supported, but usually not faster.
Definition: cpu.h:39
AVTXContext
Definition: tx_priv.h:202
ff_tx_null_def
static const FFTXCodelet ff_tx_null_def
Definition: tx.c:273
basis
static int16_t basis[64][64]
Definition: mpegvideo_enc.c:4167
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
FFTXCodeletOptions
Definition: tx_priv.h:161
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
Definition: tx.c:43
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
AV_TX_DOUBLE_FFT
@ AV_TX_DOUBLE_FFT
Definition: tx.h:48
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
FF_TX_CPU_FLAGS_ALL
#define FF_TX_CPU_FLAGS_ALL
Definition: tx_priv.h:197
print_type
static void print_type(AVBPrint *bp, enum AVTXType type)
Definition: tx.c:319
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
cmp_matches
static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
Definition: tx.c:393
print_flags
static void print_flags(AVBPrint *bp, uint64_t f)
Definition: tx.c:293
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:651
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
av_gcd
int64_t av_gcd(int64_t a, int64_t b)
Compute the greatest common divisor of two integer operands.
Definition: mathematics.c:37
ff_tx_null_init
static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:257
AV_BPRINT_SIZE_AUTOMATIC
#define AV_BPRINT_SIZE_AUTOMATIC
ff_tx_gen_split_radix_parity_revtab
int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int len, int inv, int inv_lookup, int basis, int dual_stride)
Definition: tx.c:209
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1389
TX_MAX_SUB
#define TX_MAX_SUB
Definition: tx_priv.h:167
FFTXCodelet::min_len
int min_len
Definition: tx_priv.h:181
FF_TX_ALIGNED
#define FF_TX_ALIGNED
Definition: tx_priv.h:144
FFTXCodelet::type
enum AVTXType type
Definition: tx_priv.h:172
FFDIFFSIGN
#define FFDIFFSIGN(x, y)
Comparator.
Definition: macros.h:45
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:58
avassert.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
AV_TX_NB
@ AV_TX_NB
Definition: tx.h:93
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:111
AV_TX_FLOAT_MDCT
@ AV_TX_FLOAT_MDCT
Standard MDCT with a sample data type of float, double or int32_t, respecively.
Definition: tx.h:68
av_fast_realloc
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given buffer if it is not large enough, otherwise do nothing.
Definition: mem.c:505
s
#define s(width, name)
Definition: cbs_vp9.c:256
print_cd_info
static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
Definition: tx.c:335
FFTXCodelet::flags
uint64_t flags
Definition: tx_priv.h:175
FFTXCodelet::prio
int prio
Definition: tx_priv.h:199
AV_TX_INT32_MDCT
@ AV_TX_INT32_MDCT
Definition: tx.h:70
AVTXContext::type
enum AVTXType type
Definition: tx_priv.h:223
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
FF_TX_FORWARD_ONLY
#define FF_TX_FORWARD_ONLY
Definition: tx_priv.h:147
AVTXContext::len
int len
Definition: tx_priv.h:205
AV_TX_FLOAT_FFT
@ AV_TX_FLOAT_FFT
Standard complex to complex FFT with sample data type of AVComplexFloat, AVComplexDouble or AVComplex...
Definition: tx.h:47
split_radix_permutation
static int split_radix_permutation(int i, int len, int inv)
Definition: tx.c:95
ctx
AVFormatContext * ctx
Definition: movenc.c:48
FFTXCodelet::cpu_flags
int cpu_flags
Definition: tx_priv.h:194
TX_TYPE_ANY
#define TX_TYPE_ANY
Definition: tx_priv.h:173
AV_TX_FULL_IMDCT
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
Definition: tx.h:136
reset_ctx
static void reset_ctx(AVTXContext *s)
Definition: tx.c:228
opts
AVDictionary * opts
Definition: movenc.c:50
TXCodeletMatch::cd
const FFTXCodelet * cd
Definition: tx.c:389
NULL
#define NULL
Definition: coverity.c:32
AV_TX_INPLACE
@ AV_TX_INPLACE
Performs an in-place transformation on the input.
Definition: tx.h:122
ff_tx_null
static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx.c:268
FF_TX_OUT_OF_PLACE
#define FF_TX_OUT_OF_PLACE
Definition: tx_priv.h:143
list
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining list
Definition: filter_design.txt:25
FF_TX_PRIO_MAX
@ FF_TX_PRIO_MAX
Definition: tx_priv.h:157
FFTXCodelet::init
int(* init)(AVTXContext *s, const struct FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx_priv.h:185
AV_TX_UNALIGNED
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
Definition: tx.h:128
TXCodeletMatch::prio
int prio
Definition: tx.c:390
qsort.h
FF_TX_PRESHUFFLE
#define FF_TX_PRESHUFFLE
Definition: tx_priv.h:145
f
f
Definition: af_crystalizer.c:122
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:115
AV_CPU_FLAG_AVXSLOW
#define AV_CPU_FLAG_AVXSLOW
AVX supported, but slow when using YMM registers (e.g. Bulldozer)
Definition: cpu.h:48
cpu.h
AVTXType
AVTXType
Definition: tx.h:39
ff_tx_codelet_list_float_aarch64
const FFTXCodelet *const ff_tx_codelet_list_float_aarch64[]
Definition: tx_float_init.c:48
AV_CPU_FLAG_SSE2SLOW
#define AV_CPU_FLAG_SSE2SLOW
SSE2 supported, but usually not faster.
Definition: cpu.h:35
ff_tx_codelet_list_double_c
const FFTXCodelet *const ff_tx_codelet_list_double_c[]
AV_TX_INT32_FFT
@ AV_TX_INT32_FFT
Definition: tx.h:49
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
AVTXContext::inv
int inv
Definition: tx_priv.h:206
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:248
FFTXCodelet::max_len
int max_len
Definition: tx_priv.h:182
bprint.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
AV_QSORT
#define AV_QSORT(p, num, type, cmp)
Quicksort This sort is fast, and fully inplace but not stable and it is possible to construct input t...
Definition: qsort.h:33
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
Definition: tx.c:106
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_TX_DOUBLE_RDFT
@ AV_TX_DOUBLE_RDFT
Definition: tx.h:89
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:264
len
int len
Definition: vorbis_enc_data.h:426
TX_LEN_UNLIMITED
#define TX_LEN_UNLIMITED
Definition: tx_priv.h:183
stride
#define stride
Definition: h264pred_template.c:537
AV_CPU_FLAG_ATOM
#define AV_CPU_FLAG_ATOM
Atom processor, some SSSE3 instructions are slower.
Definition: cpu.h:43
tx_priv.h
AVTXContext::flags
uint64_t flags
Definition: tx_priv.h:224
ret
ret
Definition: filter_design.txt:187
ff_tx_init_subtx
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
Definition: tx.c:439
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
TX_FACTOR_ANY
#define TX_FACTOR_ANY
Definition: tx_priv.h:179
FF_TX_INVERSE_ONLY
#define FF_TX_INVERSE_ONLY
Definition: tx_priv.h:146
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:94
ff_tx_codelet_list_float_c
const FFTXCodelet *const ff_tx_codelet_list_float_c[]
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:88
FFTXCodelet
Definition: tx_priv.h:169
check_cd_factors
static int check_cd_factors(const FFTXCodelet *cd, int len)
Definition: tx.c:399
AV_TX_INT32_RDFT
@ AV_TX_INT32_RDFT
Definition: tx.h:90
AVTXContext::cd_self
const FFTXCodelet * cd_self
Definition: tx_priv.h:222
FFTXCodelet::name
const char * name
Definition: tx_priv.h:170
factor
static const int factor[16]
Definition: vf_pp7.c:76
FF_TX_ASM_CALL
#define FF_TX_ASM_CALL
Definition: tx_priv.h:148
ff_tx_codelet_list_float_x86
const FFTXCodelet *const ff_tx_codelet_list_float_x86[]
Definition: tx_float_init.c:177
print_tx_structure
static void print_tx_structure(AVTXContext *s, int depth)
Definition: tx.c:374
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_tx_null_list
static const FFTXCodelet *const ff_tx_null_list[]
Definition: tx.c:287
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
mulinv
static av_always_inline int mulinv(int n, int m)
Definition: tx.c:32
ff_tx_codelet_list_int32_c
const FFTXCodelet *const ff_tx_codelet_list_int32_c[]
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
FFTXCodelet::factors
int factors[TX_MAX_SUB]
Definition: tx_priv.h:178
TYPE_IS
#define TYPE_IS(type, x)
Definition: tx.c:26
FFTXCodelet::function
av_tx_fn function
Definition: tx_priv.h:171
parity_revtab_generator
static void parity_revtab_generator(int *revtab, int n, int inv, int offset, int is_dual, int dual_high, int len, int basis, int dual_stride, int inv_lookup)
Definition: tx.c:166