FFmpeg
ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025-2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <float.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/x86/cpu.h"
26 
27 #include "../ops_chain.h"
28 #include "../uops.h"
29 #include "../uops_macros.h"
30 
31 static int setup_rw_packed(const SwsImplParams *params, SwsImplResult *out)
32 {
33  const SwsUOp *uop = params->uop;
34 
35  /* 3-component packed reads/writes process one extra garbage word */
36  if (uop->mask == SWS_COMP_ELEMS(3)) {
37  switch (uop->uop) {
38  case SWS_UOP_READ_PACKED: out->over_read[0] = sizeof(uint32_t); break;
39  case SWS_UOP_WRITE_PACKED: out->over_write[0] = sizeof(uint32_t); break;
40  }
41  }
42 
43  return 0;
44 }
45 
46 static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
47 {
48  const SwsFilterWeights *filter = params->uop->data.kernel;
49  static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
50  ">8 byte pointers not supported");
51 
52  /* Pre-convert weights to float */
53  float *weights = av_calloc(filter->num_weights, sizeof(float));
54  if (!weights)
55  return AVERROR(ENOMEM);
56 
57  for (int i = 0; i < filter->num_weights; i++)
58  weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
59 
60  out->priv.ptr = weights;
61  out->priv.uptr[1] = filter->filter_size;
62  out->free = ff_op_priv_free;
63  return 0;
64 }
65 
66 static int hscale_sizeof_weight(const SwsUOp *uop)
67 {
68  switch (uop->type) {
69  case SWS_PIXEL_U8: return sizeof(int16_t);
70  case SWS_PIXEL_U16: return sizeof(int16_t);
71  case SWS_PIXEL_F32: return sizeof(float);
72  default: return 0;
73  }
74 }
75 
76 static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
77 {
78  const SwsUOp *uop = params->uop;
79  const SwsFilterWeights *filter = uop->data.kernel;
80 
81  /**
82  * `vpgatherdd` gathers 32 bits at a time; so if we're filtering a smaller
83  * size, we need to gather 2/4 taps simultaneously and unroll the inner
84  * loop over several packed samples.
85  */
86  const int pixel_size = ff_sws_pixel_type_size(uop->type);
87  const int taps_align = sizeof(int32_t) / pixel_size;
88  const int filter_size = filter->filter_size;
89  const int block_size = params->table->block_size;
90  const size_t aligned_size = FFALIGN(filter_size, taps_align);
91  const size_t line_size = FFALIGN(filter->dst_size, block_size);
92  av_assert1(FFALIGN(line_size, taps_align) == line_size);
93  if (aligned_size > INT_MAX)
94  return AVERROR(EINVAL);
95 
96  union {
97  void *ptr;
98  int16_t *i16;
99  float *f32;
100  } weights;
101 
102  const int sizeof_weight = hscale_sizeof_weight(uop);
103  weights.ptr = av_calloc(line_size, sizeof_weight * aligned_size);
104  if (!weights.ptr)
105  return AVERROR(ENOMEM);
106 
107  /**
108  * Transpose filter weights to group (aligned) taps by block
109  */
110  const int mmsize = block_size * 2;
111  const int gather_size = mmsize / sizeof(int32_t); /* pixels per vpgatherdd */
112  for (size_t x = 0; x < line_size; x += block_size) {
113  const int elems = FFMIN(block_size, filter->dst_size - x);
114  for (int j = 0; j < filter_size; j++) {
115  const int jb = j & ~(taps_align - 1);
116  const int ji = j - jb;
117  const size_t idx_base = x * aligned_size + jb * block_size + ji;
118  for (int i = 0; i < elems; i++) {
119  const int w = filter->weights[(x + i) * filter_size + j];
120  size_t idx = idx_base;
121  if (uop->type == SWS_PIXEL_U8) {
122  /* Interleave the pixels within each lane, i.e.:
123  * [a0 a1 a2 a3 | b0 b1 b2 b3 ] pixels 0-1, taps 0-3 (lane 0)
124  * [e0 e1 e2 e3 | f0 f1 f2 f3 ] pixels 4-5, taps 0-3 (lane 1)
125  * [c0 c1 c2 c3 | d0 d1 d2 d3 ] pixels 2-3, taps 0-3 (lane 0)
126  * [g0 g1 g2 g3 | h0 h1 h2 h3 ] pixels 6-7, taps 0-3 (lane 1)
127  * [i0 i1 i2 i3 | j0 j1 j2 j3 ] pixels 8-9, taps 0-3 (lane 0)
128  * ...
129  * [o0 o1 o2 o3 | p0 p1 p2 p3 ] pixels 14-15, taps 0-3 (lane 1)
130  * (repeat for taps 4-7, etc.)
131  */
132  const int gather_base = i & ~(gather_size - 1);
133  const int gather_pos = i - gather_base;
134  const int lane_idx = gather_pos >> 2;
135  const int pos_in_lane = gather_pos & 3;
136  idx += gather_base * 4 /* which gather (m0 or m1) */
137  + (pos_in_lane >> 1) * (mmsize / 2) /* lo/hi unpack */
138  + lane_idx * 8 /* 8 ints per lane */
139  + (pos_in_lane & 1) * 4; /* 4 taps per pair */
140  } else {
141  idx += i * taps_align;
142  }
143 
144  switch (uop->type) {
145  case SWS_PIXEL_U8: weights.i16[idx] = w; break;
146  case SWS_PIXEL_U16: weights.i16[idx] = w; break;
147  case SWS_PIXEL_F32: weights.f32[idx] = w; break;
148  }
149  }
150  }
151  }
152 
153  out->priv.ptr = weights.ptr;
154  out->priv.uptr[1] = aligned_size;
155  out->free = ff_op_priv_free;
156 
157  for (int i = 0; i < 4; i++) {
158  if (uop->mask & SWS_COMP(i))
159  out->over_read[i] = (aligned_size - filter_size) * pixel_size;
160  }
161  return 0;
162 }
163 
164 static bool check_filter_h_4x4(const SwsImplParams *params)
165 {
166  SwsContext *ctx = params->ctx;
167  const SwsUOp *uop = params->uop;
168  if ((ctx->flags & SWS_BITEXACT) && uop->type == SWS_PIXEL_F32)
169  return false; /* different accumulation order due to 4x4 transpose */
170 
171  const int cpu_flags = av_get_cpu_flags();
173  return true; /* always prefer over gathers if gathers are slow */
174 
175  /**
176  * Otherwise, prefer it above a certain filter size. Empirically, this
177  * kernel seems to be faster whenever the reference/gather kernel crosses
178  * a breakpoint for the number of gathers needed, but this filter doesn't.
179  *
180  * Tested on a Lunar Lake (Intel Core Ultra 7 258V) system.
181  */
182  const SwsFilterWeights *filter = uop->data.kernel;
183  return uop->type == SWS_PIXEL_U8 && filter->filter_size > 12 ||
184  uop->type == SWS_PIXEL_U16 && filter->filter_size > 4 ||
185  uop->type == SWS_PIXEL_F32 && filter->filter_size > 1;
186 }
187 
189 {
190  const SwsUOp *uop = params->uop;
191  const SwsFilterWeights *filter = uop->data.kernel;
192  const int pixel_size = ff_sws_pixel_type_size(uop->type);
193  const int sizeof_weights = hscale_sizeof_weight(uop);
194  const int block_size = params->table->block_size;
195  const int taps_align = 16 / sizeof_weights; /* taps per iteration (XMM) */
196  const int pixels_align = 4; /* pixels per iteration */
197  const int filter_size = filter->filter_size;
198  const size_t aligned_size = FFALIGN(filter_size, taps_align);
199  const int line_size = FFALIGN(filter->dst_size, block_size);
200  av_assert1(FFALIGN(line_size, pixels_align) == line_size);
201 
202  union {
203  void *ptr;
204  int16_t *i16;
205  float *f32;
206  } weights;
207 
208  weights.ptr = av_calloc(line_size, aligned_size * sizeof_weights);
209  if (!weights.ptr)
210  return AVERROR(ENOMEM);
211 
212  /**
213  * Desired memory layout: [w][taps][pixels_align][taps_align]
214  *
215  * Example with taps_align=8, pixels_align=4:
216  * [a0, a1, ... a7] weights for pixel 0, taps 0..7
217  * [b0, b1, ... b7] weights for pixel 1, taps 0..7
218  * [c0, c1, ... c7] weights for pixel 2, taps 0..7
219  * [d0, d1, ... d7] weights for pixel 3, taps 0..7
220  * [a8, a9, ... a15] weights for pixel 0, taps 8..15
221  * ...
222  * repeat for all taps, then move on to pixels 4..7, etc.
223  */
224  for (int x = 0; x < filter->dst_size; x++) {
225  for (int j = 0; j < filter_size; j++) {
226  const int xb = x & ~(pixels_align - 1);
227  const int jb = j & ~(taps_align - 1);
228  const int xi = x - xb, ji = j - jb;
229  const int w = filter->weights[x * filter_size + j];
230  const int idx = xb * aligned_size + jb * pixels_align + xi * taps_align + ji;
231 
232  switch (uop->type) {
233  case SWS_PIXEL_U8: weights.i16[idx] = w; break;
234  case SWS_PIXEL_U16: weights.i16[idx] = w; break;
235  case SWS_PIXEL_F32: weights.f32[idx] = w; break;
236  }
237  }
238  }
239 
240  out->priv.ptr = weights.ptr;
241  out->priv.uptr[1] = aligned_size * sizeof_weights;
242  out->free = ff_op_priv_free;
243 
244  for (int i = 0; i < 4; i++) {
245  if (uop->mask & SWS_COMP(i))
246  out->over_read[i] = (aligned_size - filter_size) * pixel_size;
247  }
248  return 0;
249 }
250 
251 static int setup_scale(const SwsImplParams *params, SwsImplResult *out)
252 {
253  const SwsUOp *uop = params->uop;
254  switch (uop->type) {
255  case SWS_PIXEL_U8: out->priv.u16[0] = uop->data.scalar.u8; break; /* for pmullw */
256  case SWS_PIXEL_U16: out->priv.u16[0] = uop->data.scalar.u16; break;
257  case SWS_PIXEL_U32: out->priv.u32[0] = uop->data.scalar.u32; break;
258  case SWS_PIXEL_F32: out->priv.f32[0] = uop->data.scalar.f32; break;
259  default: return AVERROR(EINVAL);
260  }
261 
262  return 0;
263 }
264 
265 static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
266 {
267  const SwsUOp *uop = params->uop;
268  for (int i = 0; i < 4; i++)
269  out->priv.u32[i] = uop->data.vec4[i].u32;
270  return 0;
271 }
272 
273 static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
274 {
275  out->priv.ptr = av_refstruct_ref(params->uop->data.ptr);
276  out->free = ff_op_priv_unref;
277  return 0;
278 }
279 
280 static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
281 {
282  const SwsUOp *uop = params->uop;
283  out->priv.ptr = av_memdup(uop->data.mat4, sizeof(uop->data.mat4));
284  out->free = ff_op_priv_free;
285  return out->priv.ptr ? 0 : AVERROR(ENOMEM);
286 }
287 
288 static bool uop_is_type_invariant(const SwsUOpType uop)
289 {
290  switch (uop) {
291  case SWS_UOP_READ_PLANAR:
293  case SWS_UOP_CLEAR:
294  return true;
295  default:
296  return false;
297  }
298 }
299 
300 #define REF_ENTRY(EXT, NAME, ...) &uop_##NAME##EXT,
301 #define DECL_ENTRY(EXT, CHECK, SETUP, NAME, ...) \
302  void ff_##NAME##EXT(void); \
303  static const SwsUOpEntry uop_##NAME##EXT = { \
304  .func = (SwsFuncPtr) ff_##NAME##EXT, \
305  .check = CHECK, \
306  .setup = SETUP, \
307  __VA_ARGS__, \
308  };
309 
310 /* Define all UOPs except conversion ops and type-invariant ops */
311 #define DECL_OPS_COMMON(EXT, TYPE) \
312 SWS_FOR_STRUCT(TYPE, READ_PACKED, DECL_ENTRY, EXT, NULL, setup_rw_packed) \
313 SWS_FOR_STRUCT(TYPE, READ_NIBBLE, DECL_ENTRY, EXT, NULL, NULL) \
314 SWS_FOR_STRUCT(TYPE, READ_BIT, DECL_ENTRY, EXT, NULL, NULL) \
315 SWS_FOR_STRUCT(TYPE, READ_PALETTE, DECL_ENTRY, EXT, NULL, NULL) \
316 SWS_FOR_STRUCT(TYPE, WRITE_PACKED, DECL_ENTRY, EXT, NULL, setup_rw_packed) \
317 SWS_FOR_STRUCT(TYPE, WRITE_NIBBLE, DECL_ENTRY, EXT, NULL, NULL) \
318 SWS_FOR_STRUCT(TYPE, WRITE_BIT, DECL_ENTRY, EXT, NULL, NULL) \
319 SWS_FOR_STRUCT(TYPE, SWAP_BYTES, DECL_ENTRY, EXT, NULL, NULL) \
320 SWS_FOR_STRUCT(TYPE, EXPAND_BIT, DECL_ENTRY, EXT, NULL, NULL) \
321 SWS_FOR_STRUCT(TYPE, MOVE, DECL_ENTRY, EXT, NULL, NULL) \
322 SWS_FOR_STRUCT(TYPE, SCALE, DECL_ENTRY, EXT, NULL, setup_scale) \
323 SWS_FOR_STRUCT(TYPE, ADD, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
324 SWS_FOR_STRUCT(TYPE, MIN, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
325 SWS_FOR_STRUCT(TYPE, MAX, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
326 SWS_FOR_STRUCT(TYPE, UNPACK, DECL_ENTRY, EXT, NULL, NULL) \
327 SWS_FOR_STRUCT(TYPE, PACK, DECL_ENTRY, EXT, NULL, NULL) \
328 SWS_FOR_STRUCT(TYPE, LSHIFT, DECL_ENTRY, EXT, NULL, NULL) \
329 SWS_FOR_STRUCT(TYPE, RSHIFT, DECL_ENTRY, EXT, NULL, NULL) \
330 SWS_FOR_STRUCT(TYPE, LINEAR_FMA, DECL_ENTRY, EXT, NULL, setup_linear) \
331 SWS_FOR_STRUCT(TYPE, DITHER, DECL_ENTRY, EXT, NULL, setup_dither) \
332 /* end of macro */
333 
334 #define REF_OPS_COMMON(EXT, TYPE) \
335  SWS_FOR(TYPE, READ_PACKED, REF_ENTRY, EXT) \
336  SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY, EXT) \
337  SWS_FOR(TYPE, READ_BIT, REF_ENTRY, EXT) \
338  SWS_FOR(TYPE, READ_PALETTE, REF_ENTRY, EXT) \
339  SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY, EXT) \
340  SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY, EXT) \
341  SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY, EXT) \
342  SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY, EXT) \
343  SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY, EXT) \
344  SWS_FOR(TYPE, MOVE, REF_ENTRY, EXT) \
345  SWS_FOR(TYPE, SCALE, REF_ENTRY, EXT) \
346  SWS_FOR(TYPE, ADD, REF_ENTRY, EXT) \
347  SWS_FOR(TYPE, MIN, REF_ENTRY, EXT) \
348  SWS_FOR(TYPE, MAX, REF_ENTRY, EXT) \
349  SWS_FOR(TYPE, UNPACK, REF_ENTRY, EXT) \
350  SWS_FOR(TYPE, PACK, REF_ENTRY, EXT) \
351  SWS_FOR(TYPE, LSHIFT, REF_ENTRY, EXT) \
352  SWS_FOR(TYPE, RSHIFT, REF_ENTRY, EXT) \
353  SWS_FOR(TYPE, LINEAR_FMA, REF_ENTRY, EXT) \
354  SWS_FOR(TYPE, DITHER, REF_ENTRY, EXT) \
355  /* end of macro */
356 
357 #define DECL_TABLE_U8(EXT, SIZE, FLAG) \
358 DECL_OPS_COMMON(EXT, U8) \
359 SWS_FOR_STRUCT(U8, READ_PLANAR, DECL_ENTRY, EXT, NULL, NULL) \
360 SWS_FOR_STRUCT(U8, WRITE_PLANAR, DECL_ENTRY, EXT, NULL, NULL) \
361 SWS_FOR_STRUCT(U8, CLEAR, DECL_ENTRY, EXT, NULL, setup_clear) \
362  \
363 static const SwsUOpTable uops_u8##EXT = { \
364  .cpu_flags = AV_CPU_FLAG_##FLAG, \
365  .block_size = SIZE, \
366  .entries = { \
367  REF_OPS_COMMON(EXT, U8) \
368  SWS_FOR(U8, READ_PLANAR, REF_ENTRY, EXT) \
369  SWS_FOR(U8, WRITE_PLANAR, REF_ENTRY, EXT) \
370  SWS_FOR(U8, CLEAR, REF_ENTRY, EXT) \
371  NULL \
372  }, \
373 };
374 
375 #define DECL_TABLE_U16(EXT, SIZE, FLAG) \
376 DECL_OPS_COMMON(EXT, U16) \
377 SWS_FOR_STRUCT(U8, TO_U16, DECL_ENTRY, EXT, NULL, NULL) \
378 SWS_FOR_STRUCT(U16, TO_U8, DECL_ENTRY, EXT, NULL, NULL) \
379 SWS_FOR_STRUCT(U8, EXPAND_PAIR, DECL_ENTRY, EXT, NULL, NULL) \
380  \
381 static const SwsUOpTable uops_u16##EXT = { \
382  .cpu_flags = AV_CPU_FLAG_##FLAG, \
383  .block_size = SIZE, \
384  .entries = { \
385  REF_OPS_COMMON(EXT, U16) \
386  SWS_FOR(U8, TO_U16, REF_ENTRY, EXT) \
387  SWS_FOR(U16, TO_U8, REF_ENTRY, EXT) \
388  SWS_FOR(U8, EXPAND_PAIR, REF_ENTRY, EXT) \
389  NULL \
390  }, \
391 };
392 
393 #define DECL_TABLE_U32(EXT, SIZE, FLAG) \
394 DECL_OPS_COMMON(EXT, U32) \
395 SWS_FOR_STRUCT(U8, TO_U32, DECL_ENTRY, EXT, NULL, NULL) \
396 SWS_FOR_STRUCT(U32, TO_U8, DECL_ENTRY, EXT, NULL, NULL) \
397 SWS_FOR_STRUCT(U16, TO_U32, DECL_ENTRY, EXT, NULL, NULL) \
398 SWS_FOR_STRUCT(U32, TO_U16, DECL_ENTRY, EXT, NULL, NULL) \
399 SWS_FOR_STRUCT(U8, EXPAND_QUAD, DECL_ENTRY, EXT, NULL, NULL) \
400  \
401 static const SwsUOpTable uops_u32##EXT = { \
402  .cpu_flags = AV_CPU_FLAG_##FLAG, \
403  .block_size = SIZE, \
404  .entries = { \
405  REF_OPS_COMMON(EXT, U32) \
406  SWS_FOR(U8, TO_U32, REF_ENTRY, EXT) \
407  SWS_FOR(U32, TO_U8, REF_ENTRY, EXT) \
408  SWS_FOR(U16, TO_U32, REF_ENTRY, EXT) \
409  SWS_FOR(U32, TO_U16, REF_ENTRY, EXT) \
410  SWS_FOR(U8, EXPAND_QUAD, REF_ENTRY, EXT) \
411  NULL \
412  }, \
413 };
414 
415 #define DECL_TABLE_F32(EXT, SIZE, FLAG) \
416 DECL_OPS_COMMON(EXT, F32) \
417 SWS_FOR_STRUCT(U8, TO_F32, DECL_ENTRY, EXT, NULL, NULL) \
418 SWS_FOR_STRUCT(F32, TO_U8, DECL_ENTRY, EXT, NULL, NULL) \
419 SWS_FOR_STRUCT(U16, TO_F32, DECL_ENTRY, EXT, NULL, NULL) \
420 SWS_FOR_STRUCT(F32, TO_U16, DECL_ENTRY, EXT, NULL, NULL) \
421 SWS_FOR_STRUCT(U8, READ_PLANAR_FH, DECL_ENTRY, EXT, NULL, setup_filter_h) \
422 SWS_FOR_STRUCT(U16, READ_PLANAR_FH, DECL_ENTRY, EXT, NULL, setup_filter_h) \
423 SWS_FOR_STRUCT(F32, READ_PLANAR_FH, DECL_ENTRY, EXT, NULL, setup_filter_h) \
424 SWS_FOR_STRUCT(U8, READ_PLANAR_FH, DECL_ENTRY, _4x4##EXT, \
425  check_filter_h_4x4, setup_filter_h_4x4) \
426 SWS_FOR_STRUCT(U16, READ_PLANAR_FH, DECL_ENTRY, _4x4##EXT, \
427  check_filter_h_4x4, setup_filter_h_4x4) \
428 SWS_FOR_STRUCT(F32, READ_PLANAR_FH, DECL_ENTRY, _4x4##EXT, \
429  check_filter_h_4x4, setup_filter_h_4x4) \
430 SWS_FOR_STRUCT(U8, READ_PLANAR_FV, DECL_ENTRY, EXT, NULL, setup_filter_v) \
431 SWS_FOR_STRUCT(U16, READ_PLANAR_FV, DECL_ENTRY, EXT, NULL, setup_filter_v) \
432 SWS_FOR_STRUCT(F32, READ_PLANAR_FV, DECL_ENTRY, EXT, NULL, setup_filter_v) \
433 SWS_FOR_STRUCT(U8, READ_PLANAR_FV_FMA, DECL_ENTRY, EXT, NULL, setup_filter_v) \
434 SWS_FOR_STRUCT(U16, READ_PLANAR_FV_FMA, DECL_ENTRY, EXT, NULL, setup_filter_v) \
435 SWS_FOR_STRUCT(F32, READ_PLANAR_FV_FMA, DECL_ENTRY, EXT, NULL, setup_filter_v) \
436  \
437 static const SwsUOpTable uops_f32##EXT = { \
438  .cpu_flags = AV_CPU_FLAG_##FLAG, \
439  .block_size = SIZE, \
440  .entries = { \
441  REF_OPS_COMMON(EXT, F32) \
442  SWS_FOR(U8, TO_F32, REF_ENTRY, EXT) \
443  SWS_FOR(F32, TO_U8, REF_ENTRY, EXT) \
444  SWS_FOR(U16, TO_F32, REF_ENTRY, EXT) \
445  SWS_FOR(F32, TO_U16, REF_ENTRY, EXT) \
446  SWS_FOR(U8, READ_PLANAR_FH, REF_ENTRY, _4x4##EXT) \
447  SWS_FOR(U16, READ_PLANAR_FH, REF_ENTRY, _4x4##EXT) \
448  SWS_FOR(F32, READ_PLANAR_FH, REF_ENTRY, _4x4##EXT) \
449  SWS_FOR(U8, READ_PLANAR_FH, REF_ENTRY, EXT) \
450  SWS_FOR(U16, READ_PLANAR_FH, REF_ENTRY, EXT) \
451  SWS_FOR(F32, READ_PLANAR_FH, REF_ENTRY, EXT) \
452  SWS_FOR(U8, READ_PLANAR_FV, REF_ENTRY, EXT) \
453  SWS_FOR(U16, READ_PLANAR_FV, REF_ENTRY, EXT) \
454  SWS_FOR(F32, READ_PLANAR_FV, REF_ENTRY, EXT) \
455  SWS_FOR(U8, READ_PLANAR_FV_FMA, REF_ENTRY, EXT) \
456  SWS_FOR(U16, READ_PLANAR_FV_FMA, REF_ENTRY, EXT) \
457  SWS_FOR(F32, READ_PLANAR_FV_FMA, REF_ENTRY, EXT) \
458  NULL \
459  }, \
460 };
461 
462 DECL_TABLE_U8( _m1_sse4, 16, SSE4)
463 DECL_TABLE_U8( _m1_avx2, 32, AVX2)
464 DECL_TABLE_U8( _m2_sse4, 32, SSE4)
465 DECL_TABLE_U8( _m2_avx2, 64, AVX2)
466 DECL_TABLE_U16(_m1_avx2, 16, AVX2)
467 DECL_TABLE_U16(_m2_avx2, 32, AVX2)
468 DECL_TABLE_U32(_m2_avx2, 16, AVX2)
469 DECL_TABLE_F32(_m2_avx2, 16, AVX2)
470 
471 static const SwsUOpTable *const tables[] = {
472  &uops_u8_m1_sse4,
473  &uops_u8_m1_avx2, /* order before _m2_sse4 */
474  &uops_u8_m2_sse4,
475  &uops_u8_m2_avx2,
476  &uops_u16_m1_avx2,
477  &uops_u16_m2_avx2,
478  &uops_u32_m2_avx2,
479  &uops_f32_m2_avx2,
480 };
481 
482 SWS_DECL_FUNC(ff_sws_process1_x86);
483 SWS_DECL_FUNC(ff_sws_process2_x86);
484 SWS_DECL_FUNC(ff_sws_process3_x86);
485 SWS_DECL_FUNC(ff_sws_process4_x86);
486 
487 static int movsize(const int bytes, const int mmsize)
488 {
489  return bytes <= 4 ? 4 : /* movd */
490  bytes <= 8 ? 8 : /* movq */
491  mmsize; /* movu */
492 }
493 
494 static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
495 {
496  uint8_t shuffle[16];
497  int read_bytes, write_bytes;
498  int pixels;
499 
500  /* Solve the shuffle mask for one 128-bit lane only */
501  pixels = ff_sws_solve_shuffle(ops, shuffle, 16, 0x80, &read_bytes, &write_bytes);
502  if (pixels < 0)
503  return pixels;
504 
505  /* We can't shuffle across lanes, so restrict the vector size to XMM
506  * whenever the read/write size would be a subset of the full vector */
507  if (read_bytes < 16 || write_bytes < 16)
508  mmsize = 16;
509 
510  const int num_lanes = mmsize / 16;
511  const int in_total = num_lanes * read_bytes;
512  const int out_total = num_lanes * write_bytes;
513 
514  *out = (SwsCompiledOp) {
515  .priv = av_memdup(shuffle, sizeof(shuffle)),
516  .free = av_free,
517  .slice_align = 1,
518  .block_size = pixels * num_lanes,
519  .over_read = { movsize(in_total, mmsize) - in_total },
520  .over_write = { movsize(out_total, mmsize) - out_total },
521  .cpu_flags = mmsize > 32 ? AV_CPU_FLAG_AVX512 :
522  mmsize > 16 ? AV_CPU_FLAG_AVX2 :
524  };
525 
526  if (!out->priv)
527  return AVERROR(ENOMEM);
528 
529 #define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT) \
530 do { \
531  SWS_DECL_FUNC(ff_packed_shuffle##IN##_##OUT##_##EXT); \
532  if (in_total == IN && out_total == OUT) \
533  out->func = ff_packed_shuffle##IN##_##OUT##_##EXT; \
534 } while (0)
535 
536  ASSIGN_SHUFFLE_FUNC( 5, 15, sse4);
537  ASSIGN_SHUFFLE_FUNC( 4, 16, sse4);
538  ASSIGN_SHUFFLE_FUNC( 2, 12, sse4);
539  ASSIGN_SHUFFLE_FUNC(16, 8, sse4);
540  ASSIGN_SHUFFLE_FUNC(10, 15, sse4);
541  ASSIGN_SHUFFLE_FUNC( 8, 16, sse4);
542  ASSIGN_SHUFFLE_FUNC( 4, 12, sse4);
543  ASSIGN_SHUFFLE_FUNC(15, 5, sse4);
544  ASSIGN_SHUFFLE_FUNC(15, 15, sse4);
545  ASSIGN_SHUFFLE_FUNC(12, 16, sse4);
546  ASSIGN_SHUFFLE_FUNC( 6, 12, sse4);
547  ASSIGN_SHUFFLE_FUNC(16, 4, sse4);
548  ASSIGN_SHUFFLE_FUNC(16, 12, sse4);
549  ASSIGN_SHUFFLE_FUNC(16, 16, sse4);
550  ASSIGN_SHUFFLE_FUNC( 8, 12, sse4);
551  ASSIGN_SHUFFLE_FUNC(12, 12, sse4);
552  ASSIGN_SHUFFLE_FUNC(32, 32, avx2);
553  ASSIGN_SHUFFLE_FUNC(64, 64, avx512);
554  av_assert1(out->func);
555  return 0;
556 }
557 
558 /* Expand pixel value to 32-bits by repeating as necessary */
559 static uint32_t expand32(const SwsPixelType type, const SwsPixel value)
560 {
561  switch (type) {
562  case SWS_PIXEL_U8: return value.u8 * 0x01010101u;
563  case SWS_PIXEL_U16: return value.u16 * 0x00010001u;
564  case SWS_PIXEL_U32: return value.u32;
565  case SWS_PIXEL_F32: return value.u32; /* reinterpret */
566  default: return 0;
567  }
568 }
569 
570 static void normalize_clear(SwsUOp *uop)
571 {
572  for (int i = 0; i < 4; i++)
573  uop->data.vec4[i].u32 = expand32(uop->type, uop->data.vec4[i]);
574 }
575 
576 static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
577 {
578  const int cpu_flags = av_get_cpu_flags();
579  int ret, mmsize;
581  mmsize = 64;
582  else if (EXTERNAL_AVX2(cpu_flags))
583  mmsize = 32;
584  else if (EXTERNAL_SSE4(cpu_flags))
585  mmsize = 16;
586  else
587  return AVERROR(ENOTSUP);
588 
589  /* Special fast path for in-place packed shuffle */
590  ret = solve_shuffle(ops, mmsize, out);
591  if (ret != AVERROR(ENOTSUP))
592  return ret;
593 
595  if (!chain)
596  return AVERROR(ENOMEM);
597 
599  if (!uops) {
600  ret = AVERROR(ENOMEM);
601  goto fail;
602  }
603 
607 
608  ret = ff_sws_ops_translate(ctx, ops, flags, uops);
609  if (ret < 0)
610  goto fail;
611 
612  *out = (SwsCompiledOp) {
613  /* Use at most two full YMM regs during the widest precision section */
614  .block_size = 2 * FFMIN(mmsize, 32) / ff_sws_op_list_max_size(ops),
615  .slice_align = 1,
616  .free = ff_sws_op_chain_free_cb,
617  .priv = chain,
618  };
619 
620  for (int i = 0; i < uops->num_ops; i++) {
621  SwsUOp *uop = &uops->ops[i];
622  int op_block_size = out->block_size;
623 
624  if (uop_is_type_invariant(uop->uop)) {
625  if (uop->uop == SWS_UOP_CLEAR)
626  normalize_clear(uop);
627  op_block_size *= ff_sws_pixel_type_size(uop->type);
628  uop->type = SWS_PIXEL_U8;
629  }
630 
632  op_block_size, chain);
633  if (ret < 0)
634  goto fail;
635  }
636 
637  const SwsOp *read = ff_sws_op_list_input(ops);
638  const SwsOp *write = ff_sws_op_list_output(ops);
639  const int read_planes = read ? ff_sws_rw_op_planes(read) : 0;
640  const int write_planes = ff_sws_rw_op_planes(write);
641  switch (FFMAX(read_planes, write_planes)) {
642  case 1: out->func = ff_sws_process1_x86; break;
643  case 2: out->func = ff_sws_process2_x86; break;
644  case 3: out->func = ff_sws_process3_x86; break;
645  case 4: out->func = ff_sws_process4_x86; break;
646  }
647 
648  if (ret < 0) {
649  ff_sws_op_chain_free(chain);
650  return ret;
651  }
652 
653  out->cpu_flags = chain->cpu_flags;
654  memcpy(out->over_read, chain->over_read, sizeof(out->over_read));
655  memcpy(out->over_write, chain->over_write, sizeof(out->over_write));
656  ff_sws_uop_list_free(&uops);
657  return 0;
658 
659 fail:
660  ff_sws_uop_list_free(&uops);
661  ff_sws_op_chain_free(chain);
662  return ret;
663 }
664 
666  .name = "x86",
667  .flags = SWS_BACKEND_X86,
668  .compile = compile,
669  .hw_format = AV_PIX_FMT_NONE,
670 };
hscale_sizeof_weight
static int hscale_sizeof_weight(const SwsUOp *uop)
Definition: ops.c:66
flags
const SwsFlags flags[]
Definition: swscale.c:85
ff_sws_rw_op_planes
int ff_sws_rw_op_planes(const SwsOp *op)
Return the number of planes involved in a read/write operation.
Definition: ops.c:170
expand32
static uint32_t expand32(const SwsPixelType type, const SwsPixel value)
Definition: ops.c:559
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpChain::over_read
int over_read[4]
Definition: ops_chain.h:90
out
static FILE * out
Definition: movenc.c:55
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:696
ff_sws_op_list_max_size
int ff_sws_op_list_max_size(const SwsOpList *ops)
Returns the size of the largest pixel type used in ops.
Definition: ops.c:774
backend_x86
const SwsOpBackend backend_x86
Definition: ops.c:665
normalize_clear
static void normalize_clear(SwsUOp *uop)
Definition: ops.c:570
EXTERNAL_FMA3
#define EXTERNAL_FMA3(flags)
Definition: cpu.h:68
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:85
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:180
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
float.h
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsUOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:154
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:149
setup_linear
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:280
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
DECL_TABLE_F32
#define DECL_TABLE_F32(EXT, SIZE, FLAG)
Definition: ops.c:415
SWS_BACKEND_X86
@ SWS_BACKEND_X86
Chained x86 SIMD kernels.
Definition: swscale.h:118
av_memdup
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
Definition: mem.c:304
AV_CPU_FLAG_AVX512
#define AV_CPU_FLAG_AVX512
AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used.
Definition: cpu.h:60
setup_dither
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:273
SwsImplParams::table
const SwsUOpTable * table
Definition: ops_chain.h:106
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
SWS_COMP_ELEMS
#define SWS_COMP_ELEMS(N)
Definition: uops.h:73
DECL_TABLE_U8
#define DECL_TABLE_U8(EXT, SIZE, FLAG)
Definition: ops.c:357
avassert.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
SwsUOpTable::block_size
int block_size
Definition: ops_chain.h:156
SwsUOp::kernel
SwsFilterWeights * kernel
Definition: uops.h:224
float
float
Definition: af_crystalizer.c:122
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
SwsUOp::uop
SwsUOpType uop
Definition: uops.h:218
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1465
SWS_UOP_WRITE_PLANAR
@ SWS_UOP_WRITE_PLANAR
Definition: uops.h:114
setup_clear
static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:265
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SWS_UOP_READ_PACKED
@ SWS_UOP_READ_PACKED
Definition: uops.h:109
EXTERNAL_AVX2
#define EXTERNAL_AVX2(flags)
Definition: cpu.h:72
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:705
SwsPixel::f32
float f32
Definition: uops.h:57
SwsOpBackend
Definition: ops_dispatch.h:133
DECL_TABLE_U16
#define DECL_TABLE_U16(EXT, SIZE, FLAG)
Definition: ops.c:375
fail
#define fail
Definition: test.h:478
SwsPixel::u8
uint8_t u8
Definition: uops.h:54
compile
static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: ops.c:576
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
SwsUOp::mat4
SwsPixel mat4[4][5]
Definition: uops.h:228
AV_CPU_FLAG_AVX2
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
Definition: cpu.h:56
solve_shuffle
static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
Definition: ops.c:494
SWS_UOP_FLAG_MOVE
@ SWS_UOP_FLAG_MOVE
Definition: uops.h:98
uop_is_type_invariant
static bool uop_is_type_invariant(const SwsUOpType uop)
Definition: ops.c:288
check_filter_h_4x4
static bool check_filter_h_4x4(const SwsImplParams *params)
Definition: ops.c:164
DECL_TABLE_U32
#define DECL_TABLE_U32(EXT, SIZE, FLAG)
Definition: ops.c:393
SwsPixelType
SwsPixelType
Definition: uops.h:38
SwsImplParams
Definition: ops_chain.h:105
SwsUOp::data
union SwsUOp::@586 data
EXTERNAL_AVX512
#define EXTERNAL_AVX512(flags)
Definition: cpu.h:77
SwsPixel::u16
uint16_t u16
Definition: uops.h:55
SwsUOp
Definition: uops.h:215
SWS_DECL_FUNC
SWS_DECL_FUNC(ff_sws_process1_x86)
tables
static const SwsUOpTable *const tables[]
Definition: ops.c:471
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
movsize
static int movsize(const int bytes, const int mmsize)
Definition: ops.c:487
SwsUOp::mask
SwsCompMask mask
Definition: uops.h:219
SWS_COMP
#define SWS_COMP(X)
Definition: uops.h:70
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
cpu_flags
CheckasmCpu cpu_flags
Definition: checkasm.c:84
SwsPixel
Definition: uops.h:51
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:62
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
setup_filter_h_4x4
static int setup_filter_h_4x4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:188
SwsImplParams::ctx
SwsContext * ctx
Definition: ops_chain.h:111
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
SWS_UOP_READ_PLANAR
@ SWS_UOP_READ_PLANAR
Definition: uops.h:105
weights
static const int weights[]
Definition: hevc_pel.c:32
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: uops.h:40
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
SwsUOpType
SwsUOpType
Definition: uops.h:101
SwsOpChain::over_write
int over_write[4]
Definition: ops_chain.h:91
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
SwsUOp::scalar
SwsPixel scalar
Definition: uops.h:226
SwsOp
Definition: ops.h:230
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:144
SwsUOp::type
SwsPixelType type
Definition: uops.h:217
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:863
ret
ret
Definition: filter_design.txt:187
SwsUOpList::num_ops
int num_ops
Definition: uops.h:251
SwsCompiledOp
Definition: ops_dispatch.h:100
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **p_ops)
Definition: uops.c:368
SwsUOp::ptr
SwsPixel * ptr
Definition: uops.h:225
ASSIGN_SHUFFLE_FUNC
#define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT)
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
setup_rw_packed
static int setup_rw_packed(const SwsImplParams *params, SwsImplResult *out)
Copyright (C) 2025-2026 Niklas Haas.
Definition: ops.c:31
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:873
SwsUOpList
Definition: uops.h:249
SwsUOp::vec4
SwsPixel vec4[4]
Definition: uops.h:227
setup_scale
static int setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:251
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
SWS_UOP_WRITE_PACKED
@ SWS_UOP_WRITE_PACKED
Definition: uops.h:115
mem.h
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:62
cpu.h
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
w
uint8_t w
Definition: llvidencdsp.c:39
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
SwsImplParams::uop
const SwsUOp * uop
Definition: ops_chain.h:108
SwsUOpFlags
uint32_t SwsUOpFlags
Definition: uops.h:94
setup_filter_v
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:46
int32_t
int32_t
Definition: audioconvert.c:56
ff_sws_uop_lookup
int ff_sws_uop_lookup(SwsContext *ctx, const SwsUOpTable *const tables[], int num_tables, const SwsUOp *uop, const int block_size, SwsOpChain *chain)
"Compile" a single uop by looking it up in a list of fixed size uop tables, in decreasing order of pr...
Definition: ops_chain.c:116
SWS_UOP_FLAG_FMA
@ SWS_UOP_FLAG_FMA
Definition: uops.h:97
SWS_UOP_CLEAR
@ SWS_UOP_CLEAR
Definition: uops.h:145
xi
#define xi(width, name, var, range_min, range_max, subs,...)
Definition: cbs_h264.c:190
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:285
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
SwsPixel::u32
uint32_t u32
Definition: uops.h:56
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
SwsImplResult
Definition: ops_chain.h:114
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:250
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
AV_CPU_FLAG_SSE4
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
Definition: cpu.h:47
setup_filter_h
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:76