FFmpeg
ops_optimizer.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops.h"
26 #include "ops_internal.h"
27 
28 #define RET(x) \
29  do { \
30  if ((ret = (x)) < 0) \
31  return ret; \
32  } while (0)
33 
34 /**
35  * Try to commute a clear op with the next operation. Makes any adjustments
36  * to the operations as needed, but does not perform the actual commutation.
37  *
38  * Returns whether successful.
39  */
40 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42  av_assert1(op->op == SWS_OP_CLEAR);
43  switch (next->op) {
44  case SWS_OP_CONVERT:
45  op->type = next->convert.to;
46  /* fall through */
47  case SWS_OP_LSHIFT:
48  case SWS_OP_RSHIFT:
49  case SWS_OP_DITHER:
50  case SWS_OP_MIN:
51  case SWS_OP_MAX:
52  case SWS_OP_SCALE:
53  case SWS_OP_READ:
54  case SWS_OP_SWIZZLE:
55  ff_sws_apply_op_q(next, op->c.q4);
56  return true;
57  case SWS_OP_INVALID:
58  case SWS_OP_SWAP_BYTES:
59  case SWS_OP_WRITE:
60  case SWS_OP_LINEAR:
61  case SWS_OP_PACK:
62  case SWS_OP_UNPACK:
63  case SWS_OP_CLEAR:
64  return false;
65  case SWS_OP_TYPE_NB:
66  break;
67  }
68 
69  av_unreachable("Invalid operation type!");
70  return false;
71 }
72 
73  /**
74  * Try to commute a swizzle op with the next operation. Makes any adjustments
75  * to the operations as needed, but does not perform the actual commutation.
76  *
77  * Returns whether successful.
78  */
79 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
80 {
81  bool seen[4] = {0};
82 
84  switch (next->op) {
85  case SWS_OP_CONVERT:
86  op->type = next->convert.to;
87  /* fall through */
88  case SWS_OP_SWAP_BYTES:
89  case SWS_OP_LSHIFT:
90  case SWS_OP_RSHIFT:
91  case SWS_OP_SCALE:
92  return true;
93 
94  /**
95  * We can commute per-channel ops only if the per-channel constants are the
96  * same for all duplicated channels; e.g.:
97  * SWIZZLE {0, 0, 0, 3}
98  * NEXT {x, x, x, w}
99  * ->
100  * NEXT {x, _, _, w}
101  * SWIZZLE {0, 0, 0, 3}
102  */
103  case SWS_OP_MIN:
104  case SWS_OP_MAX: {
105  const SwsConst c = next->c;
106  for (int i = 0; i < 4; i++) {
107  if (next->comps.unused[i])
108  continue;
109  const int j = op->swizzle.in[i];
110  if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
111  return false;
112  next->c.q4[j] = c.q4[i];
113  seen[j] = true;
114  }
115  return true;
116  }
117 
118  case SWS_OP_DITHER: {
119  const SwsDitherOp d = next->dither;
120  for (int i = 0; i < 4; i++) {
121  if (next->comps.unused[i])
122  continue;
123  const int j = op->swizzle.in[i];
124  if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
125  return false;
126  next->dither.y_offset[j] = d.y_offset[i];
127  seen[j] = true;
128  }
129  return true;
130  }
131 
132  case SWS_OP_INVALID:
133  case SWS_OP_READ:
134  case SWS_OP_WRITE:
135  case SWS_OP_SWIZZLE:
136  case SWS_OP_CLEAR:
137  case SWS_OP_LINEAR:
138  case SWS_OP_PACK:
139  case SWS_OP_UNPACK:
140  return false;
141  case SWS_OP_TYPE_NB:
142  break;
143  }
144 
145  av_unreachable("Invalid operation type!");
146  return false;
147 }
148 
149 /* returns log2(x) only if x is a power of two, or 0 otherwise */
150 static int exact_log2(const int x)
151 {
152  int p;
153  if (x <= 0)
154  return 0;
155  p = av_log2(x);
156  return (1 << p) == x ? p : 0;
157 }
158 
159 static int exact_log2_q(const AVRational x)
160 {
161  if (x.den == 1)
162  return exact_log2(x.num);
163  else if (x.num == 1)
164  return -exact_log2(x.den);
165  else
166  return 0;
167 }
168 
169 /**
170  * If a linear operation can be reduced to a scalar multiplication, returns
171  * the corresponding scaling factor, or 0 otherwise.
172  */
173 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
174  SwsConst *out_scale)
175 {
176  SwsConst scale = {0};
177 
178  /* There are components not on the main diagonal */
179  if (c->mask & ~SWS_MASK_DIAG4)
180  return false;
181 
182  for (int i = 0; i < 4; i++) {
183  const AVRational s = c->m[i][i];
184  if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
185  continue;
186  if (scale.q.den && av_cmp_q(s, scale.q))
187  return false;
188  scale.q = s;
189  }
190 
191  if (scale.q.den)
192  *out_scale = scale;
193  return scale.q.den;
194 }
195 
196 /* Extracts an integer clear operation (subset) from the given linear op. */
198  SwsConst *out_clear)
199 {
200  SwsConst clear = {0};
201  bool ret = false;
202 
203  for (int i = 0; i < 4; i++) {
204  bool const_row = c->m[i][4].den == 1; /* offset is integer */
205  for (int j = 0; j < 4; j++) {
206  const_row &= c->m[i][j].num == 0 || /* scalar is zero */
207  (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
208  }
209  if (const_row && (c->mask & SWS_MASK_ROW(i))) {
210  clear.q4[i] = c->m[i][4];
211  for (int j = 0; j < 5; j++)
212  c->m[i][j] = Q(i == j);
213  c->mask &= ~SWS_MASK_ROW(i);
214  ret = true;
215  }
216  }
217 
218  if (ret)
219  *out_clear = clear;
220  return ret;
221 }
222 
223 /* Unswizzle a linear operation by aligning single-input rows with
224  * their corresponding diagonal */
225 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
226 {
227  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
228  SwsLinearOp c = *op;
229 
230  for (int i = 0; i < 4; i++) {
231  int idx = -1;
232  for (int j = 0; j < 4; j++) {
233  if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
234  continue;
235  if (idx >= 0)
236  return false; /* multiple inputs */
237  idx = j;
238  }
239 
240  if (idx >= 0 && idx != i) {
241  /* Move coefficient to the diagonal */
242  c.m[i][i] = c.m[i][idx];
243  c.m[i][idx] = Q(0);
244  swiz.in[i] = idx;
245  }
246  }
247 
248  if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
249  return false; /* no swizzle was identified */
250 
251  c.mask = ff_sws_linear_mask(c);
252  *out_swiz = swiz;
253  *op = c;
254  return true;
255 }
256 
258 {
259  int ret;
260 
261 retry:
263 
264  /* Apply all in-place optimizations (that do not re-order the list) */
265  for (int n = 0; n < ops->num_ops; n++) {
266  SwsOp dummy = {0};
267  SwsOp *op = &ops->ops[n];
268  SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
269  SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
270 
271  /* common helper variable */
272  bool noop = true;
273 
274  switch (op->op) {
275  case SWS_OP_READ:
276  /* Optimized further into refcopy / memcpy */
277  if (next->op == SWS_OP_WRITE &&
278  next->rw.elems == op->rw.elems &&
279  next->rw.packed == op->rw.packed &&
280  next->rw.frac == op->rw.frac)
281  {
282  ff_sws_op_list_remove_at(ops, n, 2);
283  av_assert1(ops->num_ops == 0);
284  return 0;
285  }
286 
287  /* Skip reading extra unneeded components */
288  if (!op->rw.packed) {
289  int needed = op->rw.elems;
290  while (needed > 0 && next->comps.unused[needed - 1])
291  needed--;
292  if (op->rw.elems != needed) {
293  op->rw.elems = needed;
294  goto retry;
295  }
296  }
297  break;
298 
299  case SWS_OP_SWAP_BYTES:
300  /* Redundant (double) swap */
301  if (next->op == SWS_OP_SWAP_BYTES) {
302  ff_sws_op_list_remove_at(ops, n, 2);
303  goto retry;
304  }
305  break;
306 
307  case SWS_OP_UNPACK:
308  /* Redundant unpack+pack */
309  if (next->op == SWS_OP_PACK && next->type == op->type &&
310  next->pack.pattern[0] == op->pack.pattern[0] &&
311  next->pack.pattern[1] == op->pack.pattern[1] &&
312  next->pack.pattern[2] == op->pack.pattern[2] &&
313  next->pack.pattern[3] == op->pack.pattern[3])
314  {
315  ff_sws_op_list_remove_at(ops, n, 2);
316  goto retry;
317  }
318  break;
319 
320  case SWS_OP_LSHIFT:
321  case SWS_OP_RSHIFT:
322  /* Two shifts in the same direction */
323  if (next->op == op->op) {
324  op->c.u += next->c.u;
325  ff_sws_op_list_remove_at(ops, n + 1, 1);
326  goto retry;
327  }
328 
329  /* No-op shift */
330  if (!op->c.u) {
331  ff_sws_op_list_remove_at(ops, n, 1);
332  goto retry;
333  }
334  break;
335 
336  case SWS_OP_CLEAR:
337  for (int i = 0; i < 4; i++) {
338  if (!op->c.q4[i].den)
339  continue;
340 
341  if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
342  !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
343  op->c.q4[i].num == 0)
344  {
345  /* Redundant clear-to-zero of zero component */
346  op->c.q4[i].den = 0;
347  } else if (next->comps.unused[i]) {
348  /* Unnecessary clear of unused component */
349  op->c.q4[i] = (AVRational) {0, 0};
350  } else if (op->c.q4[i].den) {
351  noop = false;
352  }
353  }
354 
355  if (noop) {
356  ff_sws_op_list_remove_at(ops, n, 1);
357  goto retry;
358  }
359 
360  /* Transitive clear */
361  if (next->op == SWS_OP_CLEAR) {
362  for (int i = 0; i < 4; i++) {
363  if (next->c.q4[i].den)
364  op->c.q4[i] = next->c.q4[i];
365  }
366  ff_sws_op_list_remove_at(ops, n + 1, 1);
367  goto retry;
368  }
369  break;
370 
371  case SWS_OP_SWIZZLE:
372  for (int i = 0; i < 4; i++) {
373  if (next->comps.unused[i])
374  continue;
375  if (op->swizzle.in[i] != i)
376  noop = false;
377  }
378 
379  /* Identity swizzle */
380  if (noop) {
381  ff_sws_op_list_remove_at(ops, n, 1);
382  goto retry;
383  }
384 
385  /* Transitive swizzle */
386  if (next->op == SWS_OP_SWIZZLE) {
387  const SwsSwizzleOp orig = op->swizzle;
388  for (int i = 0; i < 4; i++)
389  op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
390  ff_sws_op_list_remove_at(ops, n + 1, 1);
391  goto retry;
392  }
393  break;
394 
395  case SWS_OP_CONVERT:
396  /* No-op conversion */
397  if (op->type == op->convert.to) {
398  ff_sws_op_list_remove_at(ops, n, 1);
399  goto retry;
400  }
401 
402  /* Transitive conversion */
403  if (next->op == SWS_OP_CONVERT &&
404  op->convert.expand == next->convert.expand)
405  {
406  av_assert1(op->convert.to == next->type);
407  op->convert.to = next->convert.to;
408  ff_sws_op_list_remove_at(ops, n + 1, 1);
409  goto retry;
410  }
411 
412  /* Conversion followed by integer expansion */
413  if (next->op == SWS_OP_SCALE && !op->convert.expand &&
414  !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
415  {
416  op->convert.expand = true;
417  ff_sws_op_list_remove_at(ops, n + 1, 1);
418  goto retry;
419  }
420  break;
421 
422  case SWS_OP_MIN:
423  for (int i = 0; i < 4; i++) {
424  if (next->comps.unused[i] || !op->c.q4[i].den)
425  continue;
426  if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
427  noop = false;
428  }
429 
430  if (noop) {
431  ff_sws_op_list_remove_at(ops, n, 1);
432  goto retry;
433  }
434  break;
435 
436  case SWS_OP_MAX:
437  for (int i = 0; i < 4; i++) {
438  if (next->comps.unused[i] || !op->c.q4[i].den)
439  continue;
440  if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
441  noop = false;
442  }
443 
444  if (noop) {
445  ff_sws_op_list_remove_at(ops, n, 1);
446  goto retry;
447  }
448  break;
449 
450  case SWS_OP_DITHER:
451  for (int i = 0; i < 4; i++) {
452  noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
453  next->comps.unused[i];
454  }
455 
456  if (noop) {
457  ff_sws_op_list_remove_at(ops, n, 1);
458  goto retry;
459  }
460  break;
461 
462  case SWS_OP_LINEAR: {
463  SwsSwizzleOp swizzle;
464  SwsConst c;
465 
466  /* No-op (identity) linear operation */
467  if (!op->lin.mask) {
468  ff_sws_op_list_remove_at(ops, n, 1);
469  goto retry;
470  }
471 
472  if (next->op == SWS_OP_LINEAR) {
473  /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
474  const SwsLinearOp m1 = op->lin;
475  const SwsLinearOp m2 = next->lin;
476  for (int i = 0; i < 4; i++) {
477  for (int j = 0; j < 5; j++) {
478  AVRational sum = Q(0);
479  for (int k = 0; k < 4; k++)
480  sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
481  if (j == 4) /* m1.m[4][j] == 1 */
482  sum = av_add_q(sum, m2.m[i][4]);
483  op->lin.m[i][j] = sum;
484  }
485  }
486  op->lin.mask = ff_sws_linear_mask(op->lin);
487  ff_sws_op_list_remove_at(ops, n + 1, 1);
488  goto retry;
489  }
490 
491  /* Optimize away zero columns */
492  for (int j = 0; j < 4; j++) {
493  const uint32_t col = SWS_MASK_COL(j);
494  if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
495  continue;
496  for (int i = 0; i < 4; i++)
497  op->lin.m[i][j] = Q(i == j);
498  op->lin.mask &= ~col;
499  goto retry;
500  }
501 
502  /* Optimize away unused rows */
503  for (int i = 0; i < 4; i++) {
504  const uint32_t row = SWS_MASK_ROW(i);
505  if (!next->comps.unused[i] || !(op->lin.mask & row))
506  continue;
507  for (int j = 0; j < 5; j++)
508  op->lin.m[i][j] = Q(i == j);
509  op->lin.mask &= ~row;
510  goto retry;
511  }
512 
513  /* Convert constant rows to explicit clear instruction */
514  if (extract_constant_rows(&op->lin, prev->comps, &c)) {
515  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
516  .op = SWS_OP_CLEAR,
517  .type = op->type,
518  .comps = op->comps,
519  .c = c,
520  }));
521  goto retry;
522  }
523 
524  /* Multiplication by scalar constant */
525  if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
526  op->op = SWS_OP_SCALE;
527  op->c = c;
528  goto retry;
529  }
530 
531  /* Swizzle by fixed pattern */
532  if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
533  RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
534  .op = SWS_OP_SWIZZLE,
535  .type = op->type,
536  .swizzle = swizzle,
537  }));
538  goto retry;
539  }
540  break;
541  }
542 
543  case SWS_OP_SCALE: {
544  const int factor2 = exact_log2_q(op->c.q);
545 
546  /* No-op scaling */
547  if (op->c.q.num == 1 && op->c.q.den == 1) {
548  ff_sws_op_list_remove_at(ops, n, 1);
549  goto retry;
550  }
551 
552  /* Scaling by exact power of two */
553  if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
554  op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
555  op->c.u = FFABS(factor2);
556  goto retry;
557  }
558  break;
559  }
560  }
561  }
562 
563  /* Push clears to the back to void any unused components */
564  for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
565  SwsOp *op = &ops->ops[n];
566  SwsOp *next = &ops->ops[n + 1];
567 
568  switch (op->op) {
569  case SWS_OP_CLEAR:
570  if (op_commute_clear(op, next)) {
571  FFSWAP(SwsOp, *op, *next);
572  goto retry;
573  }
574  break;
575  }
576  }
577 
578  /* Apply any remaining preferential re-ordering optimizations; do these
579  * last because they are more likely to block other optimizations if done
580  * too aggressively */
581  for (int n = 1; n < ops->num_ops - 1; n++) { /* exclude READ/WRITE */
582  SwsOp *op = &ops->ops[n];
583  SwsOp *prev = &ops->ops[n - 1];
584  SwsOp *next = &ops->ops[n + 1];
585 
586  switch (op->op) {
587  case SWS_OP_SWIZZLE: {
588  bool seen[4] = {0};
589  bool has_duplicates = false;
590  for (int i = 0; i < 4; i++) {
591  if (next->comps.unused[i])
592  continue;
593  has_duplicates |= seen[op->swizzle.in[i]];
594  seen[op->swizzle.in[i]] = true;
595  }
596 
597  /* Try to push swizzles with duplicates towards the output */
598  if (has_duplicates && op_commute_swizzle(op, next)) {
599  FFSWAP(SwsOp, *op, *next);
600  goto retry;
601  }
602 
603  /* Move swizzle out of the way between two converts so that
604  * they may be merged */
605  if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) {
606  op->type = next->convert.to;
607  FFSWAP(SwsOp, *op, *next);
608  goto retry;
609  }
610  break;
611  }
612 
613  case SWS_OP_SCALE:
614  /* Scaling by integer before conversion to int */
615  if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
617  {
618  op->type = next->convert.to;
619  FFSWAP(SwsOp, *op, *next);
620  goto retry;
621  }
622  break;
623  }
624  }
625 
626  return 0;
627 }
628 
629 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
630  int size, uint8_t clear_val,
631  int *read_bytes, int *write_bytes)
632 {
633  const SwsOp read = ops->ops[0];
634  const int read_size = ff_sws_pixel_type_size(read.type);
635  uint32_t mask[4] = {0};
636 
637  if (!ops->num_ops || read.op != SWS_OP_READ)
638  return AVERROR(EINVAL);
639  if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
640  return AVERROR(ENOTSUP);
641 
642  for (int i = 0; i < read.rw.elems; i++)
643  mask[i] = 0x01010101 * i * read_size + 0x03020100;
644 
645  for (int opidx = 1; opidx < ops->num_ops; opidx++) {
646  const SwsOp *op = &ops->ops[opidx];
647  switch (op->op) {
648  case SWS_OP_SWIZZLE: {
649  uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
650  for (int i = 0; i < 4; i++)
651  mask[i] = orig[op->swizzle.in[i]];
652  break;
653  }
654 
655  case SWS_OP_SWAP_BYTES:
656  for (int i = 0; i < 4; i++) {
657  switch (ff_sws_pixel_type_size(op->type)) {
658  case 2: mask[i] = av_bswap16(mask[i]); break;
659  case 4: mask[i] = av_bswap32(mask[i]); break;
660  }
661  }
662  break;
663 
664  case SWS_OP_CLEAR:
665  for (int i = 0; i < 4; i++) {
666  if (!op->c.q4[i].den)
667  continue;
668  if (op->c.q4[i].num != 0 || !clear_val)
669  return AVERROR(ENOTSUP);
670  mask[i] = 0x1010101ul * clear_val;
671  }
672  break;
673 
674  case SWS_OP_CONVERT: {
675  if (!op->convert.expand)
676  return AVERROR(ENOTSUP);
677  for (int i = 0; i < 4; i++) {
678  switch (ff_sws_pixel_type_size(op->type)) {
679  case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
680  case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
681  }
682  }
683  break;
684  }
685 
686  case SWS_OP_WRITE: {
687  if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
688  return AVERROR(ENOTSUP);
689 
690  /* Initialize to no-op */
691  memset(shuffle, clear_val, size);
692 
693  const int write_size = ff_sws_pixel_type_size(op->type);
694  const int read_chunk = read.rw.elems * read_size;
695  const int write_chunk = op->rw.elems * write_size;
696  const int num_groups = size / FFMAX(read_chunk, write_chunk);
697  for (int n = 0; n < num_groups; n++) {
698  const int base_in = n * read_chunk;
699  const int base_out = n * write_chunk;
700  for (int i = 0; i < op->rw.elems; i++) {
701  const int offset = base_out + i * write_size;
702  for (int b = 0; b < write_size; b++) {
703  const uint8_t idx = mask[i] >> (b * 8);
704  if (idx != clear_val)
705  shuffle[offset + b] = base_in + idx;
706  }
707  }
708  }
709 
710  *read_bytes = num_groups * read_chunk;
711  *write_bytes = num_groups * write_chunk;
712  return num_groups;
713  }
714 
715  default:
716  return AVERROR(ENOTSUP);
717  }
718  }
719 
720  return AVERROR(EINVAL);
721 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SwsComps::flags
unsigned flags[4]
Definition: ops.h:90
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
SwsSwizzleOp::mask
uint32_t mask
Definition: ops.h:126
SwsConst
Definition: ops.h:79
SWS_COMP_ZERO
@ SWS_COMP_ZERO
Definition: ops.h:75
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
ff_sws_linear_mask
uint32_t ff_sws_linear_mask(const SwsLinearOp c)
Definition: ops.c:556
SwsOp::swizzle
SwsSwizzleOp swizzle
Definition: ops.h:193
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:158
SwsComps::unused
bool unused[4]
Definition: ops.h:91
SwsOp::convert
SwsConvertOp convert
Definition: ops.h:194
rational.h
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:191
ops.h
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
b
#define b
Definition: input.c:42
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:257
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:69
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:64
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:164
SwsComps::max
AVRational max[4]
Definition: ops.h:95
SwsOpList::num_ops
int num_ops
Definition: ops.h:231
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:165
SwsDitherOp
Definition: ops.h:139
dummy
int dummy
Definition: motion.c:64
SwsOp::c
SwsConst c
Definition: ops.h:196
SwsSwizzleOp
Definition: ops.h:120
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:79
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsOp::op
SwsOpType op
Definition: ops.h:187
Q
#define Q(q)
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
SwsDitherOp::y_offset
uint8_t y_offset[4]
Definition: ops.h:142
s
#define s(width, name)
Definition: cbs_vp9.c:198
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:132
SwsComps::min
AVRational min[4]
Definition: ops.h:95
read_chunk
static int read_chunk(AVFormatContext *s)
Definition: dhav.c:173
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
exact_log2_q
static int exact_log2_q(const AVRational x)
Definition: ops_optimizer.c:159
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:30
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:195
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:178
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:108
SwsReadWriteOp::frac
uint8_t frac
Definition: ops.h:100
SWS_COMP_GARBAGE
@ SWS_COMP_GARBAGE
Definition: ops.h:73
SwsConvertOp::to
SwsPixelType to
Definition: ops.h:135
ff_sws_op_list_remove_at
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
Definition: ops.c:516
RET
#define RET(x)
Copyright (C) 2025 Niklas Haas.
Definition: ops_optimizer.c:28
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_apply_op_q
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
Apply an operation to an AVRational.
Definition: ops.c:107
SwsConvertOp::expand
bool expand
Definition: ops.h:136
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:117
SwsConst::q
AVRational q
Definition: ops.h:82
extract_constant_rows
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, SwsConst *out_clear)
Definition: ops_optimizer.c:197
av_bswap32
#define av_bswap32
Definition: bswap.h:47
SwsOp::type
SwsPixelType type
Definition: ops.h:188
ff_sws_op_list_insert_at
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
Definition: ops.c:526
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
SwsOp::lin
SwsLinearOp lin
Definition: ops.h:190
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:44
extract_scalar
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, SwsConst *out_scale)
If a linear operation can be reduced to a scalar multiplication, returns the corresponding scaling fa...
Definition: ops_optimizer.c:173
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:224
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:213
SwsLinearOp
Definition: ops.h:145
noop
#define noop(a)
Definition: h264chroma_template.c:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
extract_swizzle
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
Definition: ops_optimizer.c:225
SwsOpList::ops
SwsOp * ops
Definition: ops.h:230
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:57
needed
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is needed
Definition: filter_design.txt:212
SwsConst::q4
AVRational q4[4]
Definition: ops.h:81
ops_internal.h
SwsOp
Definition: ops.h:186
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
ret
ret
Definition: filter_design.txt:187
bswap.h
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
op_commute_swizzle
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
Try to commute a swizzle op with the next operation.
Definition: ops_optimizer.c:79
SwsComps
Definition: ops.h:89
SwsConst::u
unsigned u
Definition: ops.h:83
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:101
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:629
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
SWS_COMP_EXACT
@ SWS_COMP_EXACT
Definition: ops.h:74
SwsReadWriteOp::elems
uint8_t elems
Definition: ops.h:99
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:273
av_add_q
AVRational av_add_q(AVRational b, AVRational c)
Add two rationals.
Definition: rational.c:93
SwsSwizzleOp::in
uint8_t in[4]
Definition: ops.h:127
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
op_commute_clear
static bool op_commute_clear(SwsOp *op, SwsOp *next)
Try to commute a clear op with the next operation.
Definition: ops_optimizer.c:40
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:229
av_bswap16
#define av_bswap16
Definition: bswap.h:28
SwsOp::pack
SwsPackOp pack
Definition: ops.h:192
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
exact_log2
static int exact_log2(const int x)
Definition: ops_optimizer.c:150