FFmpeg
ops_dispatch.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/refstruct.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "ops_dispatch.h"
31 
32 typedef struct SwsOpPass {
36  size_t num_blocks;
41  int planes_in;
45  int idx_in[4];
46  int idx_out[4];
47  int *offsets_y;
51  bool memcpy_out;
52  size_t tail_blocks;
53  uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
54  unsigned int tail_buf_size;
55 } SwsOpPass;
56 
57 static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
58  const SwsOpList *ops, SwsCompiledOp *out)
59 {
60  SwsOpList *copy;
61  SwsCompiledOp compiled = {0};
62  int ret = 0;
63 
65  if (!copy)
66  return AVERROR(ENOMEM);
67 
68  /* Ensure these are always set during compilation */
70 
71  ret = backend->compile(ctx, copy, &compiled);
72  if (ret < 0) {
73  int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
74  av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
75  backend->name, av_err2str(ret));
76  goto fail;
77  }
78 
79  *out = compiled;
80 
81  av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
82  "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
83  backend->name, out->block_size, out->over_read, out->over_write,
84  out->cpu_flags);
85 
87 
88 fail:
90  return ret;
91 }
92 
94  const SwsOpList *ops, SwsCompiledOp *out)
95 {
96  if (backend)
97  return compile_backend(ctx, backend, ops, out);
98 
99  for (int n = 0; ff_sws_op_backends[n]; n++) {
100  const SwsOpBackend *backend = ff_sws_op_backends[n];
101  if (ops->src.hw_format != backend->hw_format ||
102  ops->dst.hw_format != backend->hw_format)
103  continue;
104  if (compile_backend(ctx, backend, ops, out) < 0)
105  continue;
106 
107  return 0;
108  }
109 
110  return AVERROR(ENOTSUP);
111 }
112 
114 {
115  if (comp->free)
116  comp->free(comp->priv);
117 
118  *comp = (SwsCompiledOp) {0};
119 }
120 
121 static void op_pass_free(void *ptr)
122 {
123  SwsOpPass *p = ptr;
124  if (!p)
125  return;
126 
127  ff_sws_compiled_op_unref(&p->comp);
128  av_refstruct_unref(&p->offsets_y);
129  av_free(p->exec_base.in_bump_y);
130  av_free(p->exec_base.in_offset_x);
131  av_free(p->tail_buf);
132  av_free(p);
133 }
134 
135 static inline void get_row_data(const SwsOpPass *p, const int y_dst,
136  const uint8_t *in[4], uint8_t *out[4])
137 {
138  const SwsOpExec *base = &p->exec_base;
139  const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
140  for (int i = 0; i < p->planes_in; i++)
141  in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
142  for (int i = 0; i < p->planes_out; i++)
143  out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
144 }
145 
146 static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
147  enum AVRounding rounding)
148 {
149  const uint64_t bits = (uint64_t) pixels * pixel_bits;
150  switch (rounding) {
151  case AV_ROUND_ZERO:
152  case AV_ROUND_DOWN:
153  return bits >> 3;
154  case AV_ROUND_INF:
155  case AV_ROUND_UP:
156  return (bits + 7) >> 3;
157  default:
158  av_unreachable("Invalid rounding mode");
159  return (size_t) -1;
160  }
161 }
162 
163 static size_t safe_bytes_pad(int linesize, int plane_pad)
164 {
165  av_assert1(linesize);
166  int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
167  return FFMAX(safe_bytes, 0);
168 }
169 
170 static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
171  ptrdiff_t safe_offset,
172  const int32_t *offset_bytes)
173 {
174  size_t safe_blocks = num_blocks;
175  while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
176  safe_blocks--;
177  return safe_blocks;
178 }
179 
180 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
181  const SwsPass *pass)
182 {
183  const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
184  const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
185  const bool float_in = indesc->flags & AV_PIX_FMT_FLAG_FLOAT;
186 
187  SwsOpPass *p = pass->priv;
188  SwsOpExec *exec = &p->exec_base;
189  const SwsCompiledOp *comp = &p->comp;
190 
191  /* Set up main loop parameters */
192  const unsigned block_size = comp->block_size;
193  const size_t num_blocks = (pass->width + block_size - 1) / block_size;
194  const size_t aligned_w = num_blocks * block_size;
195  if (aligned_w < pass->width) /* overflow */
196  return AVERROR(EINVAL);
197  p->num_blocks = num_blocks;
198  p->memcpy_first = false;
199  p->memcpy_last = false;
200  p->memcpy_out = false;
201 
202  size_t safe_blocks = num_blocks;
203  for (int i = 0; i < p->planes_in; i++) {
204  int idx = p->idx_in[i];
205  int chroma = idx == 1 || idx == 2;
206  int sub_x = chroma ? indesc->log2_chroma_w : 0;
207  int sub_y = chroma ? indesc->log2_chroma_h : 0;
208 
209  size_t input_bytes = in->linesize[idx];
210  if (p->filter_size_h && float_in) {
211  /* Floating point inputs may contain NaN / Infinity in the padding */
212  const int plane_w = AV_CEIL_RSHIFT(in->width, sub_x);
213  input_bytes = pixel_bytes(plane_w, p->pixel_bits_in, AV_ROUND_UP);
214  }
215 
216  size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read);
217  size_t safe_blocks_in;
218  if (exec->in_offset_x) {
219  size_t filter_size = pixel_bytes(p->filter_size_h, p->pixel_bits_in,
220  AV_ROUND_UP);
221  safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
222  safe_bytes - filter_size,
223  exec->in_offset_x);
224  } else {
225  safe_blocks_in = safe_bytes / exec->block_size_in;
226  }
227 
228  if (safe_blocks_in < num_blocks) {
229  p->memcpy_first |= in->linesize[idx] < 0;
230  p->memcpy_last |= in->linesize[idx] > 0;
231  safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
232  }
233 
234  size_t loop_size = num_blocks * exec->block_size_in;
235  exec->in[i] = in->data[idx];
236  exec->in_stride[i] = in->linesize[idx];
237  exec->in_bump[i] = in->linesize[idx] - loop_size;
238  exec->in_sub_y[i] = sub_y;
239  exec->in_sub_x[i] = sub_x;
240  }
241 
242  for (int i = 0; i < p->planes_out; i++) {
243  int idx = p->idx_out[i];
244  int chroma = idx == 1 || idx == 2;
245  int sub_x = chroma ? outdesc->log2_chroma_w : 0;
246  int sub_y = chroma ? outdesc->log2_chroma_h : 0;
247  size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
248  size_t safe_blocks_out = safe_bytes / exec->block_size_out;
249  if (safe_blocks_out < num_blocks) {
250  p->memcpy_out = true;
251  safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
252  }
253 
254  size_t loop_size = num_blocks * exec->block_size_out;
255  exec->out[i] = out->data[idx];
256  exec->out_stride[i] = out->linesize[idx];
257  exec->out_bump[i] = out->linesize[idx] - loop_size;
258  exec->out_sub_y[i] = sub_y;
259  exec->out_sub_x[i] = sub_x;
260  }
261 
262  const bool memcpy_in = p->memcpy_first || p->memcpy_last;
263  if (!memcpy_in && !p->memcpy_out) {
264  av_assert0(safe_blocks == num_blocks);
265  return 0;
266  }
267 
268  /* Set-up tail section parameters and buffers */
269  SwsOpExec *tail = &p->exec_tail;
270  const int align = av_cpu_max_align();
271  size_t alloc_size = 0;
272  *tail = *exec;
273 
274  const size_t safe_width = safe_blocks * block_size;
275  const size_t tail_size = pass->width - safe_width;
276  p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
277  p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
278  p->tail_blocks = num_blocks - safe_blocks;
279 
280  if (exec->in_offset_x) {
281  p->tail_off_in = exec->in_offset_x[safe_width];
282  p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
283  p->tail_size_in += pixel_bytes(p->filter_size_h, p->pixel_bits_in, AV_ROUND_UP);
284  } else {
285  p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
286  p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
287  }
288 
289  const size_t alloc_width = aligned_w - safe_width;
290  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
291  size_t needed_size;
292  if (exec->in_offset_x) {
293  /* The input offset map is already padded to multiples of the block
294  * size, and clamps the input offsets to the image boundaries; so
295  * we just need to compensate for the comp->over_read */
296  needed_size = p->tail_size_in;
297  } else {
298  needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
299  }
300  size_t loop_size = p->tail_blocks * exec->block_size_in;
301  tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
302  tail->in_bump[i] = tail->in_stride[i] - loop_size;
303  alloc_size += tail->in_stride[i] * in->height;
304  }
305 
306  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
307  size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
308  size_t loop_size = p->tail_blocks * exec->block_size_out;
309  tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
310  tail->out_bump[i] = tail->out_stride[i] - loop_size;
311  alloc_size += tail->out_stride[i] * out->height;
312  }
313 
314  if (memcpy_in && exec->in_offset_x) {
315  /* `in_offset_x` is indexed relative to the line start, not the start
316  * of the section being processed; so we need to over-allocate this
317  * array to the full width of the image, even though we will only
318  * partially fill in the offsets relevant to the tail region */
319  alloc_size += aligned_w * sizeof(*exec->in_offset_x);
320  }
321 
322  av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
323  if (!p->tail_buf)
324  return AVERROR(ENOMEM);
325 
326  uint8_t *tail_buf = p->tail_buf;
327  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
328  tail->in[i] = tail_buf;
329  tail_buf += tail->in_stride[i] * in->height;
330  }
331 
332  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
333  tail->out[i] = tail_buf;
334  tail_buf += tail->out_stride[i] * out->height;
335  }
336 
337  if (memcpy_in && exec->in_offset_x) {
338  tail->in_offset_x = (int32_t *) tail_buf;
339  for (int i = safe_width; i < aligned_w; i++)
340  tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
341  }
342 
343  return 0;
344 }
345 
346 static void copy_lines(uint8_t *dst, const size_t dst_stride,
347  const uint8_t *src, const size_t src_stride,
348  const int h, const size_t bytes)
349 {
350  for (int y = 0; y < h; y++) {
351  memcpy(dst, src, bytes);
352  dst += dst_stride;
353  src += src_stride;
354  }
355 }
356 
357 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
358  const int h, const SwsPass *pass)
359 {
360  const SwsOpPass *p = pass->priv;
361  const SwsCompiledOp *comp = &p->comp;
362 
363  /* Fill exec metadata for this slice */
364  DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
365  exec.slice_y = y;
366  exec.slice_h = h;
367 
368  /**
369  * To ensure safety, we need to consider the following:
370  *
371  * 1. We can overread the input, unless this is the last line of an
372  * unpadded buffer. All defined operations can handle arbitrary pixel
373  * input, so overread of arbitrary data is fine. For flipped images,
374  * this condition is actually *inverted* to where the first line is
375  * the one at the end of the buffer.
376  *
377  * 2. We can overwrite the output, as long as we don't write more than the
378  * amount of pixels that fit into one linesize. So we always need to
379  * memcpy the last column on the output side if unpadded.
380  */
381 
382  const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
383  p->memcpy_first && y == 0;
384  const bool memcpy_out = p->memcpy_out;
385  const size_t num_blocks = p->num_blocks;
386  const size_t tail_blocks = p->tail_blocks;
387 
388  get_row_data(p, y, exec.in, exec.out);
389  if (!memcpy_in && !memcpy_out) {
390  /* Fast path (fully aligned/padded inputs and outputs) */
391  comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
392  return;
393  }
394 
395  /* Non-aligned case (slow path); process main blocks as normal, and
396  * a separate tail (via memcpy into an appropriately padded buffer) */
397  if (num_blocks > tail_blocks) {
398  for (int i = 0; i < 4; i++) {
399  /* We process fewer blocks, so the in_bump needs to be increased
400  * to reflect that the plane pointers are left on the last block,
401  * not the end of the processed line, after each loop iteration */
402  exec.in_bump[i] += exec.block_size_in * tail_blocks;
403  exec.out_bump[i] += exec.block_size_out * tail_blocks;
404  }
405 
406  comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
407  }
408 
409  DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
410  tail.slice_y = y;
411  tail.slice_h = h;
412 
413  for (int i = 0; i < p->planes_in; i++) {
414  /* Input offsets are relative to the base pointer */
415  if (!exec.in_offset_x || memcpy_in)
416  exec.in[i] += p->tail_off_in;
417  tail.in[i] += y * tail.in_stride[i];
418  }
419  for (int i = 0; i < p->planes_out; i++) {
420  exec.out[i] += p->tail_off_out;
421  tail.out[i] += y * tail.out_stride[i];
422  }
423 
424  for (int i = 0; i < p->planes_in; i++) {
425  if (memcpy_in) {
426  copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
427  exec.in[i], exec.in_stride[i], h, p->tail_size_in);
428  } else {
429  /* Reuse input pointers directly */
430  const size_t loop_size = tail_blocks * exec.block_size_in;
431  tail.in[i] = exec.in[i];
432  tail.in_stride[i] = exec.in_stride[i];
433  tail.in_bump[i] = exec.in_stride[i] - loop_size;
434  }
435  }
436 
437  for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
438  /* Reuse output pointers directly */
439  const size_t loop_size = tail_blocks * exec.block_size_out;
440  tail.out[i] = exec.out[i];
441  tail.out_stride[i] = exec.out_stride[i];
442  tail.out_bump[i] = exec.out_stride[i] - loop_size;
443  }
444 
445  /* Dispatch kernel over tail */
446  av_assert1(tail_blocks > 0);
447  comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
448 
449  for (int i = 0; memcpy_out && i < p->planes_out; i++) {
450  copy_lines(exec.out[i], exec.out_stride[i],
451  tail.out[i], tail.out_stride[i], h, p->tail_size_out);
452  }
453 }
454 
455 static int rw_planes(const SwsOp *op)
456 {
457  return op->rw.packed ? 1 : op->rw.elems;
458 }
459 
460 static int rw_pixel_bits(const SwsOp *op)
461 {
462  const int elems = op->rw.packed ? op->rw.elems : 1;
463  const int size = ff_sws_pixel_type_size(op->type);
464  const int bits = 8 >> op->rw.frac;
465  av_assert1(bits >= 1);
466  return elems * size * bits;
467 }
468 
469 static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
470 {
471  if (!pass)
472  return;
473 
474  /* Add at least as many pixels as needed to cover the padding requirement */
475  const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
476 
477  SwsPassBuffer *buf = pass->output;
478  buf->width_align = FFMAX(buf->width_align, block_size);
479  buf->width_pad = FFMAX(buf->width_pad, pad);
480 }
481 
482 static int compile(SwsGraph *graph, const SwsOpBackend *backend,
483  const SwsOpList *ops, SwsPass *input, SwsPass **output)
484 {
485  SwsContext *ctx = graph->ctx;
486  SwsOpPass *p = av_mallocz(sizeof(*p));
487  if (!p)
488  return AVERROR(ENOMEM);
489 
490  int ret = ff_sws_ops_compile(ctx, backend, ops, &p->comp);
491  if (ret < 0)
492  goto fail;
493  else if (!output)
494  goto fail; /* nothing to do, just return */
495 
496  const SwsCompiledOp *comp = &p->comp;
497  const SwsFormat *dst = &ops->dst;
498  if (p->comp.opaque) {
499  SwsCompiledOp c = *comp;
500  av_free(p);
501  return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
502  input, c.slice_align, c.func_opaque,
503  NULL, c.priv, c.free, output);
504  }
505 
506  const SwsOp *read = ff_sws_op_list_input(ops);
507  const SwsOp *write = ff_sws_op_list_output(ops);
508  p->planes_in = rw_planes(read);
509  p->planes_out = rw_planes(write);
510  p->pixel_bits_in = rw_pixel_bits(read);
511  p->pixel_bits_out = rw_pixel_bits(write);
512  p->exec_base = (SwsOpExec) {
513  .width = dst->width,
514  .height = dst->height,
515  };
516 
517  const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
518  const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
519  if (block_bits_in & 0x7 || block_bits_out & 0x7) {
520  av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
521  ret = AVERROR(EINVAL);
522  goto fail;
523  }
524 
525  p->exec_base.block_size_in = block_bits_in >> 3;
526  p->exec_base.block_size_out = block_bits_out >> 3;
527 
528  for (int i = 0; i < 4; i++) {
529  p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
530  p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
531  }
532 
533  const SwsFilterWeights *filter = read->rw.kernel;
534  if (read->rw.filter == SWS_OP_FILTER_V) {
535  p->offsets_y = av_refstruct_ref(filter->offsets);
536 
537  /* Compute relative pointer bumps for each output line */
538  int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
539  if (!bump) {
540  ret = AVERROR(ENOMEM);
541  goto fail;
542  }
543 
544  int line = filter->offsets[0];
545  for (int y = 0; y < filter->dst_size - 1; y++) {
546  int next = filter->offsets[y + 1];
547  bump[y] = next - line - 1;
548  line = next;
549  }
550  bump[filter->dst_size - 1] = 0;
551  p->exec_base.in_bump_y = bump;
552  } else if (read->rw.filter == SWS_OP_FILTER_H) {
553  /* Compute pixel offset map for each output line */
554  const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
555  int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
556  if (!offset) {
557  ret = AVERROR(ENOMEM);
558  goto fail;
559  }
560  p->exec_base.in_offset_x = offset;
561 
562  for (int x = 0; x < filter->dst_size; x++) {
563  /* Sanity check; if the tap would land on a half-pixel, we cannot
564  * reasonably expect the implementation to know about this. Just
565  * error out in such (theoretical) cases. */
566  int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
567  if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
568  ret = AVERROR(EINVAL);
569  goto fail;
570  }
571  offset[x] = bits >> 3;
572  }
573  for (int x = filter->dst_size; x < pixels; x++)
574  offset[x] = offset[filter->dst_size - 1];
575  p->exec_base.block_size_in = 0; /* ptr does not advance */
576  p->filter_size_h = filter->filter_size;
577  }
578 
579  ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
580  input, comp->slice_align, op_pass_run,
582  if (ret < 0)
583  return ret;
584 
585  align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
586  align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
587  return 0;
588 
589 fail:
590  op_pass_free(p);
591  return ret;
592 }
593 
594 int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend,
595  SwsOpList **pops, int flags, SwsPass *input,
596  SwsPass **output)
597 {
598  const int passes_orig = graph->num_passes;
599  SwsContext *ctx = graph->ctx;
600  SwsOpList *ops = *pops;
601  int ret = 0;
602 
603  /* Check if the whole operation graph is an end-to-end no-op */
604  if (ff_sws_op_list_is_noop(ops)) {
605  if (output)
606  *output = input;
607  goto out;
608  }
609 
610  const SwsOp *read = ff_sws_op_list_input(ops);
611  const SwsOp *write = ff_sws_op_list_output(ops);
612  if (!read || !write) {
613  av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
614  "and write, respectively.\n");
615  ret = AVERROR(EINVAL);
616  goto out;
617  }
618 
619  if (flags & SWS_OP_FLAG_OPTIMIZE) {
621  if (ret < 0)
622  goto out;
623  av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
625  }
626 
627  ret = compile(graph, backend, ops, input, output);
628  if (ret != AVERROR(ENOTSUP))
629  goto out;
630 
631  av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
632  SwsPass *prev = input;
633  bool first = true;
634  while (ops) {
635  SwsOpList *rest;
636  ret = ff_sws_op_list_subpass(ops, &rest);
637  if (ret < 0)
638  goto out;
639 
640  if (first && !rest) {
641  /* No point in compiling an unsplit pass again */
642  ret = AVERROR(ENOTSUP);
643  goto out;
644  }
645 
646  ret = compile(graph, backend, ops, prev, output ? &prev : NULL);
647  if (ret < 0) {
648  ff_sws_op_list_free(&rest);
649  goto out;
650  }
651 
652  ff_sws_op_list_free(&ops);
653  first = false;
654  ops = rest;
655  }
656 
657  if (output) {
658  /* Return last subpass successfully compiled */
659  av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
660  graph->num_passes - passes_orig);
661  *output = prev;
662  }
663 
664 out:
665  if (ret == AVERROR(ENOTSUP)) {
666  av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
668  }
669  if (ret < 0)
670  ff_sws_graph_rollback(graph, passes_orig);
671  ff_sws_op_list_free(&ops);
672  *pops = NULL;
673  return ret;
674 }
flags
const SwsFlags flags[]
Definition: swscale.c:72
SwsOpPass::tail_buf
uint8_t * tail_buf
Definition: ops_dispatch.c:53
copy_lines
static void copy_lines(uint8_t *dst, const size_t dst_stride, const uint8_t *src, const size_t src_stride, const int h, const size_t bytes)
Definition: ops_dispatch.c:346
AV_ROUND_UP
@ AV_ROUND_UP
Round toward +infinity.
Definition: mathematics.h:134
SwsOpPass::tail_buf_size
unsigned int tail_buf_size
Definition: ops_dispatch.c:54
rw_planes
static int rw_planes(const SwsOp *op)
Definition: ops_dispatch.c:455
ff_sws_op_list_free
void ff_sws_op_list_free(SwsOpList **p_ops)
Definition: ops.c:620
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:122
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpPass::idx_in
int idx_in[4]
Definition: ops_dispatch.c:45
SwsOpPass::tail_size_out
int tail_size_out
Definition: ops_dispatch.c:40
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:634
mem_internal.h
out
static FILE * out
Definition: movenc.c:55
SwsOpPass::exec_tail
SwsOpExec exec_tail
Definition: ops_dispatch.c:35
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_dispatch.h:42
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
AV_PIX_FMT_FLAG_FLOAT
#define AV_PIX_FMT_FLAG_FLOAT
The pixel format contains IEEE-754 floating point values.
Definition: pixdesc.h:158
ops.h
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_dispatch.h:57
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
AV_ROUND_ZERO
@ AV_ROUND_ZERO
Round toward zero.
Definition: mathematics.h:131
AVRounding
AVRounding
Rounding methods.
Definition: mathematics.h:130
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
base
uint8_t base
Definition: vp3data.h:128
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsFrame::width
int width
Dimensions and format.
Definition: format.h:218
mathematics.h
ops_dispatch.h
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpPass::tail_blocks
size_t tail_blocks
Definition: ops_dispatch.c:52
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:131
SwsOpPass::idx_out
int idx_out[4]
Definition: ops_dispatch.c:46
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
SwsPass::width
int width
Definition: graph.h:85
ff_sws_op_list_subpass
int ff_sws_op_list_subpass(SwsOpList *ops, SwsOpList **out_rest)
Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ operations.
Definition: ops_optimizer.c:948
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:296
ff_sws_op_list_print
void ff_sws_op_list_print(void *log, int lev, int lev_extra, const SwsOpList *ops)
Print out the contents of an operation list.
Definition: ops.c:961
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:45
SwsFrame::data
uint8_t * data[4]
Definition: format.h:212
fail
#define fail()
Definition: checkasm.h:225
SwsOpBackend::compile
int(* compile)(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
Compile an operation list to an implementation chain.
Definition: ops_dispatch.h:139
SwsOpBackend::hw_format
enum AVPixelFormat hw_format
If NONE, backend only supports software frames.
Definition: ops_dispatch.h:146
SwsOpPass::memcpy_last
bool memcpy_last
Definition: ops_dispatch.c:50
refstruct.h
get_row_data
static void get_row_data(const SwsOpPass *p, const int y_dst, const uint8_t *in[4], uint8_t *out[4])
Definition: ops_dispatch.c:135
safe_blocks_offset
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size, ptrdiff_t safe_offset, const int32_t *offset_bytes)
Definition: ops_dispatch.c:170
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:210
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
avassert.h
AV_LOG_TRACE
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
Definition: log.h:236
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
SwsFrame::format
enum AVPixelFormat format
Definition: format.h:219
SwsPass::priv
void * priv
Definition: graph.h:110
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
SwsGraph::num_passes
int num_passes
Definition: graph.h:132
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
AVPixFmtDescriptor::log2_chroma_w
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:80
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpPass::comp
SwsCompiledOp comp
Definition: ops_dispatch.c:33
SwsOpBackend
Definition: ops_dispatch.h:130
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
ff_sws_op_list_is_noop
bool ff_sws_op_list_is_noop(const SwsOpList *ops)
Returns whether an op list represents a true no-op operation, i.e.
Definition: ops.c:719
op_pass_free
static void op_pass_free(void *ptr)
Definition: ops_dispatch.c:121
NULL
#define NULL
Definition: coverity.c:32
ff_sws_compiled_op_unref
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
Definition: ops_dispatch.c:113
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
av_fast_mallocz
void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
Allocate and clear a buffer, reusing the given one if large enough.
Definition: mem.c:562
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
av_cpu_max_align
size_t av_cpu_max_align(void)
Get the maximum data alignment that may be required by FFmpeg.
Definition: cpu.c:287
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: ops_dispatch.c:460
compile
static int compile(SwsGraph *graph, const SwsOpBackend *backend, const SwsOpList *ops, SwsPass *input, SwsPass **output)
Definition: ops_dispatch.c:482
AVPixFmtDescriptor::flags
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:94
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsOpPass::filter_size_h
int filter_size_h
Definition: ops_dispatch.c:48
AV_ROUND_DOWN
@ AV_ROUND_DOWN
Round toward -infinity.
Definition: mathematics.h:133
SwsPass::height
int height
Definition: graph.h:85
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_dispatch.h:58
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
SwsFrame::height
int height
Definition: format.h:218
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
SwsOpExec::in_sub_x
uint8_t in_sub_x[4]
Definition: ops_dispatch.h:62
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
av_err2str
#define av_err2str(errnum)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: error.h:122
size
int size
Definition: twinvq_data.h:10344
op_pass_setup
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, const SwsPass *pass)
Definition: ops_dispatch.c:180
SwsOpPass::offsets_y
int * offsets_y
Definition: ops_dispatch.c:47
SwsOpList::src
SwsFormat src
Definition: ops.h:293
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:341
compile_backend
static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Definition: ops_dispatch.c:57
SwsFormat
Definition: format.h:77
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:419
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
SwsPass::output
SwsPassBuffer * output
Filter output buffer.
Definition: graph.h:98
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
line
Definition: graph2dot.c:48
SWS_OP_FLAG_OPTIMIZE
@ SWS_OP_FLAG_OPTIMIZE
Definition: ops.h:372
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
SwsOpPass::planes_in
int planes_in
Definition: ops_dispatch.c:41
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:350
SwsPassBuffer::width_align
int width_align
Definition: graph.h:66
SwsOpPass::pixel_bits_out
int pixel_bits_out
Definition: ops_dispatch.c:44
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
SwsOpPass::planes_out
int planes_out
Definition: ops_dispatch.c:42
AV_ROUND_INF
@ AV_ROUND_INF
Round away from zero.
Definition: mathematics.h:132
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
SwsOpPass::tail_size_in
int tail_size_in
Definition: ops_dispatch.c:39
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
DECLARE_ALIGNED_32
#define DECLARE_ALIGNED_32(t, v)
Definition: mem_internal.h:113
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ops_internal.h
SwsOpPass
Copyright (C) 2025 Niklas Haas.
Definition: ops_dispatch.c:32
pixel_bytes
static size_t pixel_bytes(size_t pixels, int pixel_bits, enum AVRounding rounding)
Definition: ops_dispatch.c:146
SwsOp
Definition: ops.h:238
SwsOpExec::out_sub_y
uint8_t out_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpExec::out_sub_x
uint8_t out_sub_x[4]
Definition: ops_dispatch.h:62
SwsOpPass::memcpy_first
bool memcpy_first
Definition: ops_dispatch.c:49
ff_sws_graph_add_pass
int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, int align, SwsPassFunc run, SwsPassSetup setup, void *priv, void(*free_cb)(void *priv), SwsPass **out_pass)
Allocate and add a new pass to the filter graph.
Definition: graph.c:175
ret
ret
Definition: filter_design.txt:187
SwsOpList::dst
SwsFormat dst
Definition: ops.h:293
SwsCompiledOp
Definition: ops_dispatch.h:100
SwsPassBuffer::width_pad
int width_pad
Definition: graph.h:67
SwsFormat::hw_format
enum AVPixelFormat hw_format
Definition: format.h:81
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
SwsOpPass::num_blocks
size_t num_blocks
Definition: ops_dispatch.c:36
safe_bytes_pad
static size_t safe_bytes_pad(int linesize, int plane_pad)
Definition: ops_dispatch.c:163
SwsOpPass::exec_base
SwsOpExec exec_base
Definition: ops_dispatch.c:34
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:594
SwsOpExec::in_sub_y
uint8_t in_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpPass::pixel_bits_in
int pixel_bits_in
Definition: ops_dispatch.c:43
SwsOpPass::tail_off_in
int tail_off_in
Definition: ops_dispatch.c:37
SwsOpPass::memcpy_out
bool memcpy_out
Definition: ops_dispatch.c:51
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:121
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
align_pass
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
Definition: ops_dispatch.c:469
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
op_pass_run
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, const int h, const SwsPass *pass)
Definition: ops_dispatch.c:357
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SwsPassBuffer
Represents an output buffer for a filter pass.
Definition: graph.h:59
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:296
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:288
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPass::tail_off_out
int tail_off_out
Definition: ops_dispatch.c:38
SwsFrame::linesize
int linesize[4]
Definition: format.h:213
AVPixFmtDescriptor::log2_chroma_h
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:89
src
#define src
Definition: vp8dsp.c:248
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
ff_sws_ops_compile
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend, or the best available backend if ba...
Definition: ops_dispatch.c:93
ff_sws_graph_rollback
void ff_sws_graph_rollback(SwsGraph *graph, int since_idx)
Remove all passes added since the given index.
Definition: graph.c:878