FFmpeg
ops_tmpl_common.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "ops_backend.h"
22 
23 #ifndef BIT_DEPTH
24 # error Should only be included from ops_tmpl_*.c!
25 #endif
26 
27 #define WRAP_CONVERT_UINT(N) \
28 DECL_PATTERN(convert_uint##N) \
29 { \
30  u##N##block_t xu, yu, zu, wu; \
31  \
32  SWS_LOOP \
33  for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
34  if (X) \
35  xu[i] = x[i]; \
36  if (Y) \
37  yu[i] = y[i]; \
38  if (Z) \
39  zu[i] = z[i]; \
40  if (W) \
41  wu[i] = w[i]; \
42  } \
43  \
44  CONTINUE(xu, yu, zu, wu); \
45 } \
46  \
47 WRAP_COMMON_PATTERNS(convert_uint##N, \
48  .op = SWS_OP_CONVERT, \
49  .convert.to = SWS_PIXEL_U##N, \
50 );
51 
52 #if BIT_DEPTH != 8
54 #endif
55 
56 #if BIT_DEPTH != 16
58 #endif
59 
60 #if BIT_DEPTH != 32 || defined(IS_FLOAT)
62 #endif
63 
65 {
66  SWS_LOOP
67  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
68  if (!X)
69  x[i] = impl->priv.px[0];
70  if (!Y)
71  y[i] = impl->priv.px[1];
72  if (!Z)
73  z[i] = impl->priv.px[2];
74  if (!W)
75  w[i] = impl->priv.px[3];
76  }
77 
78  CONTINUE(x, y, z, w);
79 }
80 
81 #define WRAP_CLEAR(X, Y, Z, W) \
82 DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
83  \
84 DECL_ENTRY(clear##_##X##Y##Z##W, \
85  .setup = ff_sws_setup_clear, \
86  .op = SWS_OP_CLEAR, \
87  .flexible = true, \
88  .unused = { !X, !Y, !Z, !W }, \
89 );
90 
91 WRAP_CLEAR(1, 1, 1, 0) /* rgba alpha */
92 WRAP_CLEAR(0, 1, 1, 1) /* argb alpha */
93 WRAP_CLEAR(1, 0, 1, 1) /* ya alpha */
94 
95 WRAP_CLEAR(0, 0, 1, 1) /* vuya chroma */
96 WRAP_CLEAR(1, 0, 0, 1) /* yuva chroma */
97 WRAP_CLEAR(1, 1, 0, 0) /* ayuv chroma */
98 WRAP_CLEAR(0, 1, 0, 1) /* uyva chroma */
99 WRAP_CLEAR(1, 0, 1, 0) /* xvyu chroma */
100 
101 WRAP_CLEAR(1, 0, 0, 0) /* gray -> yuva */
102 WRAP_CLEAR(0, 1, 0, 0) /* gray -> ayuv */
103 WRAP_CLEAR(0, 0, 1, 0) /* gray -> vuya */
104 
106 {
107  SWS_LOOP
108  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
109  if (X)
110  x[i] = FFMIN(x[i], impl->priv.px[0]);
111  if (Y)
112  y[i] = FFMIN(y[i], impl->priv.px[1]);
113  if (Z)
114  z[i] = FFMIN(z[i], impl->priv.px[2]);
115  if (W)
116  w[i] = FFMIN(w[i], impl->priv.px[3]);
117  }
118 
119  CONTINUE(x, y, z, w);
120 }
121 
123 {
124  SWS_LOOP
125  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
126  if (X)
127  x[i] = FFMAX(x[i], impl->priv.px[0]);
128  if (Y)
129  y[i] = FFMAX(y[i], impl->priv.px[1]);
130  if (Z)
131  z[i] = FFMAX(z[i], impl->priv.px[2]);
132  if (W)
133  w[i] = FFMAX(w[i], impl->priv.px[3]);
134  }
135 
136  CONTINUE(x, y, z, w);
137 }
138 
140  .op = SWS_OP_MIN,
141  .setup = ff_sws_setup_clamp,
142  .flexible = true,
143 );
144 
146  .op = SWS_OP_MAX,
147  .setup = ff_sws_setup_clamp,
148  .flexible = true,
149 );
150 
152 {
153  const pixel_t scale = impl->priv.px[0];
154 
155  SWS_LOOP
156  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
157  if (X)
158  x[i] *= scale;
159  if (Y)
160  y[i] *= scale;
161  if (Z)
162  z[i] *= scale;
163  if (W)
164  w[i] *= scale;
165  }
166 
167  CONTINUE(x, y, z, w);
168 }
169 
171  .op = SWS_OP_SCALE,
172  .setup = ff_sws_setup_scale,
173  .flexible = true,
174 );
175 
177 {
178  const SwsFilterWeights *filter = params->op->rw.kernel;
179  static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
180  ">8 byte pointers not supported");
181 
182  /* Pre-convert weights to float */
183  float *weights = av_calloc(filter->num_weights, sizeof(float));
184  if (!weights)
185  return AVERROR(ENOMEM);
186 
187  for (int i = 0; i < filter->num_weights; i++)
188  weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
189 
190  out->priv.ptr = weights;
191  out->priv.i32[2] = filter->filter_size;
192  out->free = ff_op_priv_free;
193  return 0;
194 }
195 
196 /* Fully general vertical planar filter case */
197 DECL_READ(filter_v, const int elems)
198 {
199  const SwsOpExec *exec = iter->exec;
200  const float *restrict weights = impl->priv.ptr;
201  const int filter_size = impl->priv.i32[2];
202  weights += filter_size * iter->y;
203 
204  f32block_t xs, ys, zs, ws;
205  memset(xs, 0, sizeof(xs));
206  if (elems > 1)
207  memset(ys, 0, sizeof(ys));
208  if (elems > 2)
209  memset(zs, 0, sizeof(zs));
210  if (elems > 3)
211  memset(ws, 0, sizeof(ws));
212 
213  for (int j = 0; j < filter_size; j++) {
214  const float weight = weights[j];
215 
216  SWS_LOOP
217  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
218  xs[i] += weight * in0[i];
219  if (elems > 1)
220  ys[i] += weight * in1[i];
221  if (elems > 2)
222  zs[i] += weight * in2[i];
223  if (elems > 3)
224  ws[i] += weight * in3[i];
225  }
226 
227  in0 = bump_ptr(in0, exec->in_stride[0]);
228  if (elems > 1)
229  in1 = bump_ptr(in1, exec->in_stride[1]);
230  if (elems > 2)
231  in2 = bump_ptr(in2, exec->in_stride[2]);
232  if (elems > 3)
233  in3 = bump_ptr(in3, exec->in_stride[3]);
234  }
235 
236  for (int i = 0; i < elems; i++)
237  iter->in[i] += sizeof(block_t);
238 
239  CONTINUE(xs, ys, zs, ws);
240 }
241 
243 {
244  SwsFilterWeights *filter = params->op->rw.kernel;
245  out->priv.ptr = av_refstruct_ref(filter->weights);
246  out->priv.i32[2] = filter->filter_size;
247  out->free = ff_op_priv_unref;
248  return 0;
249 }
250 
251 /* Fully general horizontal planar filter case */
252 DECL_READ(filter_h, const int elems)
253 {
254  const SwsOpExec *exec = iter->exec;
255  const int *restrict weights = impl->priv.ptr;
256  const int filter_size = impl->priv.i32[2];
257  const float scale = 1.0f / SWS_FILTER_SCALE;
258  const int xpos = iter->x;
259  weights += filter_size * iter->x;
260 
261  f32block_t xs, ys, zs, ws;
262  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
263  const int offset = exec->in_offset_x[xpos + i];
264  pixel_t *start0 = bump_ptr(in0, offset);
265  pixel_t *start1 = bump_ptr(in1, offset);
266  pixel_t *start2 = bump_ptr(in2, offset);
267  pixel_t *start3 = bump_ptr(in3, offset);
268 
269  inter_t sx = 0, sy = 0, sz = 0, sw = 0;
270  for (int j = 0; j < filter_size; j++) {
271  const int weight = weights[j];
272  sx += weight * start0[j];
273  if (elems > 1)
274  sy += weight * start1[j];
275  if (elems > 2)
276  sz += weight * start2[j];
277  if (elems > 3)
278  sw += weight * start3[j];
279  }
280 
281  xs[i] = (float) sx * scale;
282  if (elems > 1)
283  ys[i] = (float) sy * scale;
284  if (elems > 2)
285  zs[i] = (float) sz * scale;
286  if (elems > 3)
287  ws[i] = (float) sw * scale;
288 
289  weights += filter_size;
290  }
291 
292  CONTINUE(xs, ys, zs, ws);
293 }
294 
295 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
296 static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \
297  const SwsOpImpl *restrict impl, \
298  void *restrict x, void *restrict y,\
299  void *restrict z, void *restrict w)\
300 { \
301  CALL_READ(FUNC##SUFFIX, ELEMS); \
302 } \
303  \
304 DECL_ENTRY(FUNC##ELEMS##SUFFIX, \
305  .op = SWS_OP_READ, \
306  .setup = fn(setup_filter##SUFFIX), \
307  .rw.elems = ELEMS, \
308  .rw.filter = SWS_OP_FILTER_##DIR, \
309 );
310 
311 WRAP_FILTER(filter, V, 1, _v)
312 WRAP_FILTER(filter, V, 2, _v)
313 WRAP_FILTER(filter, V, 3, _v)
314 WRAP_FILTER(filter, V, 4, _v)
315 
316 WRAP_FILTER(filter, H, 1, _h)
317 WRAP_FILTER(filter, H, 2, _h)
318 WRAP_FILTER(filter, H, 3, _h)
319 WRAP_FILTER(filter, H, 4, _h)
320 
321 static void fn(process)(const SwsOpExec *exec, const void *priv,
322  const int bx_start, const int y_start,
323  int bx_end, int y_end)
324 {
325  const SwsOpChain *chain = priv;
326  const SwsOpImpl *impl = chain->impl;
327  u32block_t x, y, z, w; /* allocate enough space for any intermediate */
328 
329  SwsOpIter iterdata;
330  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
331  iter->exec = exec;
332  for (int i = 0; i < 4; i++) {
333  iter->in[i] = (uintptr_t) exec->in[i];
334  iter->out[i] = (uintptr_t) exec->out[i];
335  }
336 
337  for (iter->y = y_start; iter->y < y_end; iter->y++) {
338  for (int block = bx_start; block < bx_end; block++) {
339  iter->x = block * SWS_BLOCK_SIZE;
340  CONTINUE(x, y, z, w);
341  }
342 
343  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
344  for (int i = 0; i < 4; i++) {
345  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
346  iter->out[i] += exec->out_bump[i];
347  }
348  }
349 }
WRAP_CLEAR
#define WRAP_CLEAR(X, Y, Z, W)
Definition: ops_tmpl_common.c:81
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ops_backend.h
f32block_t
float f32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:36
out
static FILE * out
Definition: movenc.c:55
SwsOpIter::exec
const SwsOpExec * exec
Definition: ops_backend.h:52
block_t
#define block_t
Definition: ops_tmpl_float.c:34
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:274
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
CONTINUE
#define CONTINUE(X, Y, Z, W)
Definition: ops_backend.h:115
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsOpIter
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.h:46
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:154
SwsOpIter::x
int x
Definition: ops_backend.h:49
WRAP_CONVERT_UINT
#define WRAP_CONVERT_UINT(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_tmpl_common.c:27
DECL_PATTERN
DECL_PATTERN(clear)
Definition: ops_tmpl_common.c:64
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
fn
Definition: ops_tmpl_float.c:123
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
float
float
Definition: af_crystalizer.c:122
W
#define W(a, i, v)
Definition: jpegls.h:119
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_LOOP
#define SWS_LOOP
Definition: ops_backend.h:58
SwsOpImpl
Definition: ops_chain.h:71
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
DECL_READ
DECL_READ(filter_v, const int elems)
Definition: ops_tmpl_common.c:197
u32block_t
uint32_t u32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:35
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.c:30
SwsOpIter::out
uintptr_t out[4]
Definition: ops_backend.h:48
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
bump_ptr
#define bump_ptr(ptr, bump)
Definition: ops_backend.h:71
xs
#define xs(width, name, var, subs,...)
Definition: cbs_vp9.c:305
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
V
#define V
Definition: avdct.c:32
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:289
SwsOpIter::in
uintptr_t in[4]
Definition: ops_backend.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
WRAP_FILTER
#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)
Definition: ops_tmpl_common.c:295
process
static void fn() process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: ops_tmpl_common.c:321
pixel_t
#define pixel_t
Definition: ops_tmpl_float.c:32
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
H
#define H
Definition: pixlet.c:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
Y
#define Y
Definition: boxblur.h:37
DECL_SETUP
DECL_SETUP(setup_filter_v, params, out)
Definition: ops_tmpl_common.c:176
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
weights
static const int weights[]
Definition: hevc_pel.c:32
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
inter_t
#define inter_t
Definition: ops_tmpl_float.c:33
w
uint8_t w
Definition: llvidencdsp.c:39
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
setup_filter_v
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:312
X
@ X
Definition: vf_addroi.c:27
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
WRAP_COMMON_PATTERNS
WRAP_COMMON_PATTERNS(min,.op=SWS_OP_MIN,.setup=ff_sws_setup_clamp,.flexible=true,)
SwsOpIter::y
int y
Definition: ops_backend.h:49
min
float min
Definition: vorbis_enc_data.h:429
setup_filter_h
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:342