FFmpeg
ops_optimizer.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops.h"
26 #include "ops_internal.h"
27 
28 #define RET(x) \
29  do { \
30  if ((ret = (x)) < 0) \
31  return ret; \
32  } while (0)
33 
34 /**
35  * Try to commute a clear op with the next operation. Makes any adjustments
36  * to the operations as needed, but does not perform the actual commutation.
37  *
38  * Returns whether successful.
39  */
40 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42  SwsOp tmp;
43 
44  av_assert1(op->op == SWS_OP_CLEAR);
45  switch (next->op) {
46  case SWS_OP_CONVERT:
47  op->type = next->convert.to;
48  /* fall through */
49  case SWS_OP_LSHIFT:
50  case SWS_OP_RSHIFT:
51  case SWS_OP_DITHER:
52  case SWS_OP_MIN:
53  case SWS_OP_MAX:
54  case SWS_OP_SCALE:
55  case SWS_OP_READ:
56  case SWS_OP_SWIZZLE:
57  ff_sws_apply_op_q(next, op->c.q4);
58  return true;
59  case SWS_OP_SWAP_BYTES:
60  switch (next->type) {
61  case SWS_PIXEL_U16:
62  ff_sws_apply_op_q(next, op->c.q4); /* always works */
63  return true;
64  case SWS_PIXEL_U32:
65  for (int i = 0; i < 4; i++) {
66  uint32_t v = av_bswap32(op->c.q4[i].num);
67  if (v > INT_MAX)
68  return false; /* can't represent as AVRational anymore */
69  tmp.c.q4[i] = Q(v);
70  }
71  op->c = tmp.c;
72  return true;
73  default:
74  return false;
75  }
76  case SWS_OP_INVALID:
77  case SWS_OP_WRITE:
78  case SWS_OP_LINEAR:
79  case SWS_OP_PACK:
80  case SWS_OP_UNPACK:
81  case SWS_OP_CLEAR:
82  return false;
83  case SWS_OP_TYPE_NB:
84  break;
85  }
86 
87  av_unreachable("Invalid operation type!");
88  return false;
89 }
90 
91  /**
92  * Try to commute a swizzle op with the next operation. Makes any adjustments
93  * to the operations as needed, but does not perform the actual commutation.
94  *
95  * Returns whether successful.
96  */
97 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
98 {
99  bool seen[4] = {0};
100 
101  av_assert1(op->op == SWS_OP_SWIZZLE);
102  switch (next->op) {
103  case SWS_OP_CONVERT:
104  op->type = next->convert.to;
105  /* fall through */
106  case SWS_OP_SWAP_BYTES:
107  case SWS_OP_LSHIFT:
108  case SWS_OP_RSHIFT:
109  case SWS_OP_SCALE:
110  return true;
111 
112  /**
113  * We can commute per-channel ops only if the per-channel constants are the
114  * same for all duplicated channels; e.g.:
115  * SWIZZLE {0, 0, 0, 3}
116  * NEXT {x, x, x, w}
117  * ->
118  * NEXT {x, _, _, w}
119  * SWIZZLE {0, 0, 0, 3}
120  */
121  case SWS_OP_MIN:
122  case SWS_OP_MAX: {
123  const SwsConst c = next->c;
124  for (int i = 0; i < 4; i++) {
125  if (next->comps.unused[i])
126  continue;
127  const int j = op->swizzle.in[i];
128  if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129  return false;
130  next->c.q4[j] = c.q4[i];
131  seen[j] = true;
132  }
133  return true;
134  }
135 
136  case SWS_OP_DITHER: {
137  const SwsDitherOp d = next->dither;
138  for (int i = 0; i < 4; i++) {
139  if (next->comps.unused[i])
140  continue;
141  const int j = op->swizzle.in[i];
142  if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143  return false;
144  next->dither.y_offset[j] = d.y_offset[i];
145  seen[j] = true;
146  }
147  return true;
148  }
149 
150  case SWS_OP_INVALID:
151  case SWS_OP_READ:
152  case SWS_OP_WRITE:
153  case SWS_OP_SWIZZLE:
154  case SWS_OP_CLEAR:
155  case SWS_OP_LINEAR:
156  case SWS_OP_PACK:
157  case SWS_OP_UNPACK:
158  return false;
159  case SWS_OP_TYPE_NB:
160  break;
161  }
162 
163  av_unreachable("Invalid operation type!");
164  return false;
165 }
166 
167 /* returns log2(x) only if x is a power of two, or 0 otherwise */
168 static int exact_log2(const int x)
169 {
170  int p;
171  if (x <= 0)
172  return 0;
173  p = av_log2(x);
174  return (1 << p) == x ? p : 0;
175 }
176 
177 static int exact_log2_q(const AVRational x)
178 {
179  if (x.den == 1)
180  return exact_log2(x.num);
181  else if (x.num == 1)
182  return -exact_log2(x.den);
183  else
184  return 0;
185 }
186 
187 /**
188  * If a linear operation can be reduced to a scalar multiplication, returns
189  * the corresponding scaling factor, or 0 otherwise.
190  */
191 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192  SwsConst *out_scale)
193 {
194  SwsConst scale = {0};
195 
196  /* There are components not on the main diagonal */
197  if (c->mask & ~SWS_MASK_DIAG4)
198  return false;
199 
200  for (int i = 0; i < 4; i++) {
201  const AVRational s = c->m[i][i];
202  if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
203  continue;
204  if (scale.q.den && av_cmp_q(s, scale.q))
205  return false;
206  scale.q = s;
207  }
208 
209  if (scale.q.den)
210  *out_scale = scale;
211  return scale.q.den;
212 }
213 
214 /* Extracts an integer clear operation (subset) from the given linear op. */
216  SwsConst *out_clear)
217 {
218  SwsConst clear = {0};
219  bool ret = false;
220 
221  for (int i = 0; i < 4; i++) {
222  bool const_row = c->m[i][4].den == 1; /* offset is integer */
223  for (int j = 0; j < 4; j++) {
224  const_row &= c->m[i][j].num == 0 || /* scalar is zero */
225  (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226  }
227  if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228  clear.q4[i] = c->m[i][4];
229  for (int j = 0; j < 5; j++)
230  c->m[i][j] = Q(i == j);
231  c->mask &= ~SWS_MASK_ROW(i);
232  ret = true;
233  }
234  }
235 
236  if (ret)
237  *out_clear = clear;
238  return ret;
239 }
240 
241 /* Unswizzle a linear operation by aligning single-input rows with
242  * their corresponding diagonal */
243 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
244 {
245  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246  SwsLinearOp c = *op;
247 
248  /* Find non-zero coefficients in the main 4x4 matrix */
249  uint32_t nonzero = 0;
250  for (int i = 0; i < 4; i++) {
251  for (int j = 0; j < 4; j++) {
252  if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
253  continue;
254  nonzero |= SWS_MASK(i, j);
255  }
256  }
257 
258  /* If a value is unique in its row and the target column is
259  * empty, move it there and update the input swizzle */
260  for (int i = 0; i < 4; i++) {
261  if (nonzero & SWS_MASK_COL(i))
262  continue; /* target column is not empty */
263  for (int j = 0; j < 4; j++) {
264  if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265  /* Move coefficient to the diagonal */
266  c.m[i][i] = c.m[i][j];
267  c.m[i][j] = Q(0);
268  swiz.in[i] = j;
269  break;
270  }
271  }
272  }
273 
274  if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275  return false; /* no swizzle was identified */
276 
277  c.mask = ff_sws_linear_mask(c);
278  *out_swiz = swiz;
279  *op = c;
280  return true;
281 }
282 
284 {
285  int ret;
286 
287 retry:
289 
290  /* Apply all in-place optimizations (that do not re-order the list) */
291  for (int n = 0; n < ops->num_ops; n++) {
292  SwsOp dummy = {0};
293  SwsOp *op = &ops->ops[n];
294  SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295  SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296 
297  /* common helper variable */
298  bool noop = true;
299 
300  if (next->comps.unused[0] && next->comps.unused[1] &&
301  next->comps.unused[2] && next->comps.unused[3])
302  {
303  /* Remove completely unused operations */
304  ff_sws_op_list_remove_at(ops, n, 1);
305  goto retry;
306  }
307 
308  switch (op->op) {
309  case SWS_OP_READ:
310  /* "Compress" planar reads where not all components are needed */
311  if (!op->rw.packed) {
312  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313  int nb_planes = 0;
314  for (int i = 0; i < op->rw.elems; i++) {
315  if (next->comps.unused[i]) {
316  swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317  continue;
318  }
319 
320  const int idx = nb_planes++;
321  av_assert1(idx <= i);
322  ops->order_src.in[idx] = ops->order_src.in[i];
323  swiz.in[i] = idx;
324  }
325 
326  if (nb_planes < op->rw.elems) {
327  op->rw.elems = nb_planes;
328  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329  .op = SWS_OP_SWIZZLE,
330  .type = op->type,
331  .swizzle = swiz,
332  }));
333  goto retry;
334  }
335  }
336  break;
337 
338  case SWS_OP_SWAP_BYTES:
339  /* Redundant (double) swap */
340  if (next->op == SWS_OP_SWAP_BYTES) {
341  ff_sws_op_list_remove_at(ops, n, 2);
342  goto retry;
343  }
344  break;
345 
346  case SWS_OP_UNPACK:
347  /* Redundant unpack+pack */
348  if (next->op == SWS_OP_PACK && next->type == op->type &&
349  next->pack.pattern[0] == op->pack.pattern[0] &&
350  next->pack.pattern[1] == op->pack.pattern[1] &&
351  next->pack.pattern[2] == op->pack.pattern[2] &&
352  next->pack.pattern[3] == op->pack.pattern[3])
353  {
354  ff_sws_op_list_remove_at(ops, n, 2);
355  goto retry;
356  }
357  break;
358 
359  case SWS_OP_LSHIFT:
360  case SWS_OP_RSHIFT:
361  /* Two shifts in the same direction */
362  if (next->op == op->op) {
363  op->c.u += next->c.u;
364  ff_sws_op_list_remove_at(ops, n + 1, 1);
365  goto retry;
366  }
367 
368  /* No-op shift */
369  if (!op->c.u) {
370  ff_sws_op_list_remove_at(ops, n, 1);
371  goto retry;
372  }
373  break;
374 
375  case SWS_OP_CLEAR:
376  for (int i = 0; i < 4; i++) {
377  if (!op->c.q4[i].den)
378  continue;
379 
380  if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381  !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382  op->c.q4[i].num == 0)
383  {
384  /* Redundant clear-to-zero of zero component */
385  op->c.q4[i].den = 0;
386  } else if (next->comps.unused[i]) {
387  /* Unnecessary clear of unused component */
388  op->c.q4[i] = (AVRational) {0, 0};
389  } else if (op->c.q4[i].den) {
390  noop = false;
391  }
392  }
393 
394  if (noop) {
395  ff_sws_op_list_remove_at(ops, n, 1);
396  goto retry;
397  }
398 
399  /* Transitive clear */
400  if (next->op == SWS_OP_CLEAR) {
401  for (int i = 0; i < 4; i++) {
402  if (next->c.q4[i].den)
403  op->c.q4[i] = next->c.q4[i];
404  }
405  ff_sws_op_list_remove_at(ops, n + 1, 1);
406  goto retry;
407  }
408  break;
409 
410  case SWS_OP_SWIZZLE:
411  for (int i = 0; i < 4; i++) {
412  if (next->comps.unused[i])
413  continue;
414  if (op->swizzle.in[i] != i)
415  noop = false;
416  }
417 
418  /* Identity swizzle */
419  if (noop) {
420  ff_sws_op_list_remove_at(ops, n, 1);
421  goto retry;
422  }
423 
424  /* Transitive swizzle */
425  if (next->op == SWS_OP_SWIZZLE) {
426  const SwsSwizzleOp orig = op->swizzle;
427  for (int i = 0; i < 4; i++)
428  op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429  ff_sws_op_list_remove_at(ops, n + 1, 1);
430  goto retry;
431  }
432 
433  /* Swizzle planes instead of components, if possible */
434  if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435  for (int dst = 0; dst < prev->rw.elems; dst++) {
436  const int src = op->swizzle.in[dst];
437  if (src > dst && src < prev->rw.elems) {
438  FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439  for (int i = dst; i < 4; i++) {
440  if (op->swizzle.in[i] == dst)
441  op->swizzle.in[i] = src;
442  else if (op->swizzle.in[i] == src)
443  op->swizzle.in[i] = dst;
444  }
445  goto retry;
446  }
447  }
448  }
449 
450  if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451  for (int dst = 0; dst < next->rw.elems; dst++) {
452  const int src = op->swizzle.in[dst];
453  if (src > dst && src < next->rw.elems) {
454  FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455  FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456  goto retry;
457  }
458  }
459  }
460  break;
461 
462  case SWS_OP_CONVERT:
463  /* No-op conversion */
464  if (op->type == op->convert.to) {
465  ff_sws_op_list_remove_at(ops, n, 1);
466  goto retry;
467  }
468 
469  /* Transitive conversion */
470  if (next->op == SWS_OP_CONVERT &&
471  op->convert.expand == next->convert.expand)
472  {
473  av_assert1(op->convert.to == next->type);
474  op->convert.to = next->convert.to;
475  ff_sws_op_list_remove_at(ops, n + 1, 1);
476  goto retry;
477  }
478 
479  /* Conversion followed by integer expansion */
480  if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481  ff_sws_pixel_type_is_int(op->type) &&
482  ff_sws_pixel_type_is_int(op->convert.to) &&
483  !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
484  {
485  op->convert.expand = true;
486  ff_sws_op_list_remove_at(ops, n + 1, 1);
487  goto retry;
488  }
489  break;
490 
491  case SWS_OP_MIN:
492  for (int i = 0; i < 4; i++) {
493  if (next->comps.unused[i] || !op->c.q4[i].den)
494  continue;
495  if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
496  noop = false;
497  }
498 
499  if (noop) {
500  ff_sws_op_list_remove_at(ops, n, 1);
501  goto retry;
502  }
503  break;
504 
505  case SWS_OP_MAX:
506  for (int i = 0; i < 4; i++) {
507  if (next->comps.unused[i] || !op->c.q4[i].den)
508  continue;
509  if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
510  noop = false;
511  }
512 
513  if (noop) {
514  ff_sws_op_list_remove_at(ops, n, 1);
515  goto retry;
516  }
517  break;
518 
519  case SWS_OP_DITHER:
520  for (int i = 0; i < 4; i++) {
521  if (next->comps.unused[i] || op->dither.y_offset[i] < 0)
522  continue;
523  if (prev->comps.flags[i] & SWS_COMP_EXACT) {
524  op->dither.y_offset[i] = -1; /* unnecessary dither */
525  goto retry;
526  } else {
527  noop = false;
528  }
529  }
530 
531  if (noop) {
532  ff_sws_op_list_remove_at(ops, n, 1);
533  goto retry;
534  }
535  break;
536 
537  case SWS_OP_LINEAR: {
538  SwsSwizzleOp swizzle;
539  SwsConst c;
540 
541  /* No-op (identity) linear operation */
542  if (!op->lin.mask) {
543  ff_sws_op_list_remove_at(ops, n, 1);
544  goto retry;
545  }
546 
547  if (next->op == SWS_OP_LINEAR) {
548  /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
549  const SwsLinearOp m1 = op->lin;
550  const SwsLinearOp m2 = next->lin;
551  for (int i = 0; i < 4; i++) {
552  for (int j = 0; j < 5; j++) {
553  AVRational sum = Q(0);
554  for (int k = 0; k < 4; k++)
555  sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
556  if (j == 4) /* m1.m[4][j] == 1 */
557  sum = av_add_q(sum, m2.m[i][4]);
558  op->lin.m[i][j] = sum;
559  }
560  }
561  op->lin.mask = ff_sws_linear_mask(op->lin);
562  ff_sws_op_list_remove_at(ops, n + 1, 1);
563  goto retry;
564  }
565 
566  /* Optimize away zero columns */
567  for (int j = 0; j < 4; j++) {
568  const uint32_t col = SWS_MASK_COL(j);
569  if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
570  continue;
571  for (int i = 0; i < 4; i++)
572  op->lin.m[i][j] = Q(i == j);
573  op->lin.mask &= ~col;
574  goto retry;
575  }
576 
577  /* Optimize away unused rows */
578  for (int i = 0; i < 4; i++) {
579  const uint32_t row = SWS_MASK_ROW(i);
580  if (!next->comps.unused[i] || !(op->lin.mask & row))
581  continue;
582  for (int j = 0; j < 5; j++)
583  op->lin.m[i][j] = Q(i == j);
584  op->lin.mask &= ~row;
585  goto retry;
586  }
587 
588  /* Convert constant rows to explicit clear instruction */
589  if (extract_constant_rows(&op->lin, prev->comps, &c)) {
590  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
591  .op = SWS_OP_CLEAR,
592  .type = op->type,
593  .comps = op->comps,
594  .c = c,
595  }));
596  goto retry;
597  }
598 
599  /* Multiplication by scalar constant */
600  if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
601  op->op = SWS_OP_SCALE;
602  op->c = c;
603  goto retry;
604  }
605 
606  /* Swizzle by fixed pattern */
607  if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
608  RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
609  .op = SWS_OP_SWIZZLE,
610  .type = op->type,
611  .swizzle = swizzle,
612  }));
613  goto retry;
614  }
615  break;
616  }
617 
618  case SWS_OP_SCALE: {
619  const int factor2 = exact_log2_q(op->c.q);
620 
621  /* No-op scaling */
622  if (op->c.q.num == 1 && op->c.q.den == 1) {
623  ff_sws_op_list_remove_at(ops, n, 1);
624  goto retry;
625  }
626 
627  /* Scaling by exact power of two */
628  if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
629  op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
630  op->c.u = FFABS(factor2);
631  goto retry;
632  }
633  break;
634  }
635  }
636  }
637 
638  /* Push clears to the back to void any unused components */
639  for (int n = 0; n < ops->num_ops - 1; n++) {
640  SwsOp *op = &ops->ops[n];
641  SwsOp *next = &ops->ops[n + 1];
642 
643  switch (op->op) {
644  case SWS_OP_CLEAR:
645  if (op_commute_clear(op, next)) {
646  FFSWAP(SwsOp, *op, *next);
647  goto retry;
648  }
649  break;
650  }
651  }
652 
653  /* Apply any remaining preferential re-ordering optimizations; do these
654  * last because they are more likely to block other optimizations if done
655  * too aggressively */
656  for (int n = 0; n < ops->num_ops - 1; n++) {
657  SwsOp *op = &ops->ops[n];
658  SwsOp *next = &ops->ops[n + 1];
659 
660  switch (op->op) {
661  case SWS_OP_SWIZZLE: {
662  /* Try to push swizzles towards the output */
663  if (op_commute_swizzle(op, next)) {
664  FFSWAP(SwsOp, *op, *next);
665  goto retry;
666  }
667  break;
668  }
669 
670  case SWS_OP_SCALE:
671  /* Scaling by integer before conversion to int */
672  if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
674  {
675  op->type = next->convert.to;
676  FFSWAP(SwsOp, *op, *next);
677  goto retry;
678  }
679  break;
680  }
681  }
682 
683  return 0;
684 }
685 
686 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
687  int size, uint8_t clear_val,
688  int *read_bytes, int *write_bytes)
689 {
690  if (!ops->num_ops)
691  return AVERROR(EINVAL);
692 
693  const SwsOp *read = ff_sws_op_list_input(ops);
694  if (!read || read->rw.frac || (!read->rw.packed && read->rw.elems > 1))
695  return AVERROR(ENOTSUP);
696 
697  const int read_size = ff_sws_pixel_type_size(read->type);
698  uint32_t mask[4] = {0};
699  for (int i = 0; i < read->rw.elems; i++)
700  mask[i] = 0x01010101 * i * read_size + 0x03020100;
701 
702  for (int opidx = 1; opidx < ops->num_ops; opidx++) {
703  const SwsOp *op = &ops->ops[opidx];
704  switch (op->op) {
705  case SWS_OP_SWIZZLE: {
706  uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
707  for (int i = 0; i < 4; i++)
708  mask[i] = orig[op->swizzle.in[i]];
709  break;
710  }
711 
712  case SWS_OP_SWAP_BYTES:
713  for (int i = 0; i < 4; i++) {
714  switch (ff_sws_pixel_type_size(op->type)) {
715  case 2: mask[i] = av_bswap16(mask[i]); break;
716  case 4: mask[i] = av_bswap32(mask[i]); break;
717  }
718  }
719  break;
720 
721  case SWS_OP_CLEAR:
722  for (int i = 0; i < 4; i++) {
723  if (!op->c.q4[i].den)
724  continue;
725  if (op->c.q4[i].num != 0 || !clear_val)
726  return AVERROR(ENOTSUP);
727  mask[i] = 0x1010101ul * clear_val;
728  }
729  break;
730 
731  case SWS_OP_CONVERT: {
732  if (!op->convert.expand)
733  return AVERROR(ENOTSUP);
734  for (int i = 0; i < 4; i++) {
735  switch (ff_sws_pixel_type_size(op->type)) {
736  case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
737  case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
738  }
739  }
740  break;
741  }
742 
743  case SWS_OP_WRITE: {
744  if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
745  return AVERROR(ENOTSUP);
746 
747  /* Initialize to no-op */
748  memset(shuffle, clear_val, size);
749 
750  const int write_size = ff_sws_pixel_type_size(op->type);
751  const int read_chunk = read->rw.elems * read_size;
752  const int write_chunk = op->rw.elems * write_size;
753  const int num_groups = size / FFMAX(read_chunk, write_chunk);
754  for (int n = 0; n < num_groups; n++) {
755  const int base_in = n * read_chunk;
756  const int base_out = n * write_chunk;
757  for (int i = 0; i < op->rw.elems; i++) {
758  const int offset = base_out + i * write_size;
759  for (int b = 0; b < write_size; b++) {
760  const uint8_t idx = mask[i] >> (b * 8);
761  if (idx != clear_val)
762  shuffle[offset + b] = base_in + idx;
763  }
764  }
765  }
766 
767  *read_bytes = num_groups * read_chunk;
768  *write_bytes = num_groups * write_chunk;
769  return num_groups;
770  }
771 
772  default:
773  return AVERROR(ENOTSUP);
774  }
775  }
776 
777  return AVERROR(EINVAL);
778 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
SwsSwizzleOp::mask
uint32_t mask
Definition: ops.h:128
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:570
SwsConst
Definition: ops.h:81
SWS_COMP_ZERO
@ SWS_COMP_ZERO
Definition: ops.h:77
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
ff_sws_linear_mask
uint32_t ff_sws_linear_mask(const SwsLinearOp c)
Definition: ops.c:657
SwsOp::swizzle
SwsSwizzleOp swizzle
Definition: ops.h:195
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:160
SwsComps::unused
bool unused[4]
Definition: ops.h:93
SwsOp::convert
SwsConvertOp convert
Definition: ops.h:196
rational.h
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:193
ops.h
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
b
#define b
Definition: input.c:42
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:283
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:69
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:63
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:166
SwsComps::max
AVRational max[4]
Definition: ops.h:97
SwsOpList::num_ops
int num_ops
Definition: ops.h:226
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:167
SwsDitherOp
Definition: ops.h:141
dummy
int dummy
Definition: motion.c:64
SwsOp::c
SwsConst c
Definition: ops.h:198
SwsSwizzleOp
Definition: ops.h:122
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:78
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsOp::op
SwsOpType op
Definition: ops.h:189
Q
#define Q(q)
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:134
SwsComps::min
AVRational min[4]
Definition: ops.h:97
read_chunk
static int read_chunk(AVFormatContext *s)
Definition: dhav.c:173
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
exact_log2_q
static int exact_log2_q(const AVRational x)
Definition: ops_optimizer.c:177
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:31
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:197
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SWS_COMP_GARBAGE
@ SWS_COMP_GARBAGE
Definition: ops.h:75
SwsConvertOp::to
SwsPixelType to
Definition: ops.h:137
ff_sws_op_list_remove_at
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
Definition: ops.c:588
RET
#define RET(x)
Copyright (C) 2025 Niklas Haas.
Definition: ops_optimizer.c:28
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:164
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_apply_op_q
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
Apply an operation to an AVRational.
Definition: ops.c:132
SwsConvertOp::expand
bool expand
Definition: ops.h:138
SwsOpList::order_dst
SwsSwizzleOp order_dst
Definition: ops.h:232
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:119
SwsConst::q
AVRational q
Definition: ops.h:84
extract_constant_rows
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, SwsConst *out_clear)
Definition: ops_optimizer.c:215
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
av_bswap32
#define av_bswap32
Definition: bswap.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:190
ff_sws_op_list_insert_at
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
Definition: ops.c:598
size
int size
Definition: twinvq_data.h:10344
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:180
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
SwsOp::lin
SwsLinearOp lin
Definition: ops.h:192
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:44
extract_scalar
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, SwsConst *out_scale)
If a linear operation can be reduced to a scalar multiplication, returns the corresponding scaling fa...
Definition: ops_optimizer.c:191
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:267
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:208
SwsLinearOp
Definition: ops.h:147
noop
#define noop(a)
Definition: h264chroma_template.c:71
extract_swizzle
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
Definition: ops_optimizer.c:243
SwsOpList::ops
SwsOp * ops
Definition: ops.h:225
SwsOpList::order_src
SwsSwizzleOp order_src
Definition: ops.h:232
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsConst::q4
AVRational q4[4]
Definition: ops.h:83
ops_internal.h
SwsOp
Definition: ops.h:188
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
SwsComps::flags
SwsCompFlags flags[4]
Definition: ops.h:92
ret
ret
Definition: filter_design.txt:187
bswap.h
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
op_commute_swizzle
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
Try to commute a swizzle op with the next operation.
Definition: ops_optimizer.c:97
SwsComps
Definition: ops.h:91
SwsConst::u
unsigned u
Definition: ops.h:85
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:103
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:686
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
SWS_COMP_EXACT
@ SWS_COMP_EXACT
Definition: ops.h:76
SwsReadWriteOp::elems
uint8_t elems
Definition: ops.h:101
SwsDitherOp::y_offset
int8_t y_offset[4]
Definition: ops.h:144
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
av_add_q
AVRational av_add_q(AVRational b, AVRational c)
Add two rationals.
Definition: rational.c:93
SwsSwizzleOp::in
uint8_t in[4]
Definition: ops.h:129
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
op_commute_clear
static bool op_commute_clear(SwsOp *op, SwsOp *next)
Try to commute a clear op with the next operation.
Definition: ops_optimizer.c:40
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:224
av_bswap16
#define av_bswap16
Definition: bswap.h:28
SwsOp::pack
SwsPackOp pack
Definition: ops.h:194
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
exact_log2
static int exact_log2(const int x)
Definition: ops_optimizer.c:168