FFmpeg
uops_backend.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 
23 #include "uops_tmpl.h"
24 
25 /**
26  * We want to disable FP contraction because this is a reference backend that
27  * establishes a bit-exact reference result.
28  */
29 #ifdef __clang__
30 #pragma STDC FP_CONTRACT OFF
31 #elif AV_GCC_VERSION_AT_LEAST(4, 8)
32 #pragma GCC optimize ("fp-contract=off")
33 #elif defined(_MSC_VER)
34 #pragma fp_contract (off)
35 #endif
36 
37 #if AV_GCC_VERSION_AT_LEAST(4, 4)
38 #pragma GCC optimize ("finite-math-only")
39 #endif
40 
41 /* Integer types */
42 #define IS_FLOAT 0
43 # define BIT_DEPTH 8
44 # include "uops_tmpl.c"
45 # undef BIT_DEPTH
46 # define BIT_DEPTH 16
47 # include "uops_tmpl.c"
48 # undef BIT_DEPTH
49 # define BIT_DEPTH 32
50 # include "uops_tmpl.c"
51 # undef BIT_DEPTH
52 #undef IS_FLOAT
53 
54 /* Floating point types */
55 #define IS_FLOAT 1
56 # define BIT_DEPTH 32
57 # include "uops_tmpl.c"
58 # undef BIT_DEPTH
59 #undef IS_FLOAT
60 
61 /* Expanded as new uop types are implemented in the C/template backend */
62 #define REF_ALL_UOPS(TYPE) \
63  SWS_FOR(TYPE, READ_PLANAR, REF_ENTRY) \
64  SWS_FOR(TYPE, READ_PLANAR_FV, REF_ENTRY) \
65  SWS_FOR(TYPE, READ_PLANAR_FH, REF_ENTRY) \
66  SWS_FOR(TYPE, READ_PACKED, REF_ENTRY) \
67  SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY) \
68  SWS_FOR(TYPE, READ_BIT, REF_ENTRY) \
69  SWS_FOR(TYPE, READ_PALETTE, REF_ENTRY) \
70  SWS_FOR(TYPE, PERMUTE, REF_ENTRY) \
71  SWS_FOR(TYPE, COPY, REF_ENTRY) \
72  SWS_FOR(TYPE, WRITE_PLANAR, REF_ENTRY) \
73  SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY) \
74  SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY) \
75  SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY) \
76  SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY) \
77  SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY) \
78  SWS_FOR(TYPE, EXPAND_PAIR, REF_ENTRY) \
79  SWS_FOR(TYPE, EXPAND_QUAD, REF_ENTRY) \
80  SWS_FOR(TYPE, TO_U8, REF_ENTRY) \
81  SWS_FOR(TYPE, TO_U16, REF_ENTRY) \
82  SWS_FOR(TYPE, TO_U32, REF_ENTRY) \
83  SWS_FOR(TYPE, TO_F32, REF_ENTRY) \
84  SWS_FOR(TYPE, SCALE, REF_ENTRY) \
85  SWS_FOR(TYPE, ADD, REF_ENTRY) \
86  SWS_FOR(TYPE, MIN, REF_ENTRY) \
87  SWS_FOR(TYPE, MAX, REF_ENTRY) \
88  SWS_FOR(TYPE, UNPACK, REF_ENTRY) \
89  SWS_FOR(TYPE, PACK, REF_ENTRY) \
90  SWS_FOR(TYPE, LSHIFT, REF_ENTRY) \
91  SWS_FOR(TYPE, RSHIFT, REF_ENTRY) \
92  SWS_FOR(TYPE, CLEAR, REF_ENTRY) \
93  SWS_FOR(TYPE, LINEAR, REF_ENTRY) \
94  SWS_FOR(TYPE, DITHER, REF_ENTRY) \
95  /* end of macro */
96 
97 static const SwsUOpTable uop_table = {
99  .entries = {
104  NULL
105  },
106 };
107 
108 static void process(const SwsOpExec *exec, const void *priv,
109  const int bx_start, const int y_start,
110  int bx_end, int y_end)
111 {
112  const SwsOpChain *chain = priv;
113  const SwsOpImpl *impl = chain->impl;
114  block_t x, y, z, w; /* allocate enough space for any intermediate */
115 
116  SwsOpIter iterdata;
117  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
118  iter->exec = exec;
119  for (int i = 0; i < 4; i++) {
120  iter->in[i] = (uintptr_t) exec->in[i];
121  iter->out[i] = (uintptr_t) exec->out[i];
122  }
123 
124  for (iter->y = y_start; iter->y < y_end; iter->y++) {
125  for (int block = bx_start; block < bx_end; block++) {
126  iter->x = block * SWS_BLOCK_SIZE;
127  CONTINUE(&x, &y, &z, &w);
128  }
129 
130  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
131  for (int i = 0; i < 4; i++) {
132  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
133  iter->out[i] += exec->out_bump[i];
134  }
135  }
136 }
137 
138 static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
139 {
140  int ret;
141 
143  if (!chain)
144  return AVERROR(ENOMEM);
145 
147  if (!uops) {
148  ret = AVERROR(ENOMEM);
149  goto fail;
150  }
151 
152  ret = ff_sws_ops_translate(ctx, ops, 0, uops);
153  if (ret < 0)
154  goto fail;
155 
156  av_assert0(uops->num_ops > 0);
157  for (int i = 0; i < uops->num_ops; i++) {
158  const SwsUOpTable *table = &uop_table;
159  ret = ff_sws_uop_lookup(ctx, &table, 1, &uops->ops[i],
160  SWS_BLOCK_SIZE, chain);
161  if (ret < 0)
162  goto fail;
163  }
164 
165  *out = (SwsCompiledOp) {
166  .slice_align = 1,
167  .block_size = SWS_BLOCK_SIZE,
168  .cpu_flags = chain->cpu_flags,
169  .priv = chain,
171  .func = process,
172  };
173 
174  memcpy(out->over_read, chain->over_read, sizeof(out->over_read));
175  memcpy(out->over_write, chain->over_write, sizeof(out->over_write));
176 
177  av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n");
178  for (int i = 0; i < uops->num_ops; i++) {
179  char name[SWS_UOP_NAME_MAX];
180  ff_sws_uop_name(&uops->ops[i], name);
181  av_log(ctx, AV_LOG_DEBUG, " %s\n", name);
182  }
183 
184  ff_sws_uop_list_free(&uops);
185  return 0;
186 
187 fail:
188  ff_sws_uop_list_free(&uops);
189  ff_sws_op_chain_free(chain);
190  return ret;
191 }
192 
194  .name = "c",
195  .flags = SWS_BACKEND_C,
196  .compile = compile,
197  .hw_format = AV_PIX_FMT_NONE,
198 };
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpChain::over_read
int over_read[4]
Definition: ops_chain.h:90
out
static FILE * out
Definition: movenc.c:55
REF_ALL_UOPS
#define REF_ALL_UOPS(TYPE)
Definition: uops_backend.c:62
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
SwsOpIter::exec
const SwsOpExec * exec
Definition: uops_tmpl.h:62
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
table
static const uint16_t table[]
Definition: prosumer.c:203
SwsOpIter
Internal context holding per-iter execution data.
Definition: uops_tmpl.h:56
SwsUOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:154
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:252
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SwsOpIter::x
int x
Definition: uops_tmpl.h:59
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
avassert.h
F32
@ F32
Definition: sw_ops.c:44
SwsUOpTable::block_size
int block_size
Definition: ops_chain.h:156
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
backend_c
const SwsOpBackend backend_c
Copyright (C) 2025 Niklas Haas.
Definition: uops_backend.c:193
SwsOpExec::in_bump_y
int32_t * in_bump_y
Line bump; determines how many additional lines to advance (after incrementing normally to the next l...
Definition: ops_dispatch.h:72
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SwsOpImpl
Definition: ops_chain.h:71
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SwsOpBackend
Definition: ops_dispatch.h:133
SwsOpIter::out
uintptr_t out[4]
Definition: uops_tmpl.h:58
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
fail
#define fail
Definition: test.h:478
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
NULL
#define NULL
Definition: coverity.c:32
compile
static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: uops_backend.c:138
block_t
Definition: uops_tmpl.h:43
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2026 Niklas Haas.
Definition: uops_tmpl.h:40
process
static void process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: uops_backend.c:108
SwsOpIter::in
uintptr_t in[4]
Definition: uops_tmpl.h:57
U8
@ U8
Definition: sw_ops.c:41
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
CONTINUE
#define CONTINUE(...)
Definition: uops_tmpl.h:107
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
SwsOpChain::over_write
int over_write[4]
Definition: ops_chain.h:91
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:863
ret
ret
Definition: filter_design.txt:187
SwsUOpList::num_ops
int num_ops
Definition: uops.h:257
SwsCompiledOp
Definition: ops_dispatch.h:100
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **p_ops)
Definition: uops.c:368
SWS_BACKEND_C
@ SWS_BACKEND_C
Template-based C reference implementation.
Definition: swscale.h:116
uops_tmpl.c
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:130
SwsUOpList
Definition: uops.h:255
uop_table
static const SwsUOpTable uop_table
Definition: uops_backend.c:97
U16
@ U16
Definition: sw_ops.c:42
w
uint8_t w
Definition: llvidencdsp.c:39
uops_tmpl.h
ff_sws_uop_lookup
int ff_sws_uop_lookup(SwsContext *ctx, const SwsUOpTable *const tables[], int num_tables, const SwsUOp *uop, const int block_size, SwsOpChain *chain)
"Compile" a single uop by looking it up in a list of fixed size uop tables, in decreasing order of pr...
Definition: ops_chain.c:116
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:264
SwsContext
Main external API structure.
Definition: swscale.h:229
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:256
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
SwsOpIter::y
int y
Definition: uops_tmpl.h:59
U32
@ U32
Definition: sw_ops.c:43