FFmpeg
ops_chain.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_OPS_CHAIN_H
22 #define SWSCALE_OPS_CHAIN_H
23 
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem.h"
26 
27 #include "ops_internal.h"
28 
29 /**
30  * Helpers for SIMD implementations based on chained kernels, using a
31  * continuation passing style to link them together.
32  *
33  * The basic idea here is to "link" together a series of different operation
34  * kernels by constructing a list of kernel addresses into an SwsOpChain. Each
35  * kernel will load the address of the next kernel (the "continuation") from
36  * this struct, and jump directly into it; using an internal function signature
37  * that is an implementation detail of the specific backend.
38  */
39 
40 typedef struct SwsUOpTable SwsUOpTable;
41 
42 /**
43  * Private data for each kernel.
44  */
45 typedef union SwsOpPriv {
46  DECLARE_ALIGNED_16(char, data)[16];
47 
48  /* Common types */
49  void *ptr;
50  uint8_t u8[16];
51  int8_t i8[16];
52  uint16_t u16[8];
53  int16_t i16[8];
54  uint32_t u32[4];
56  float f32[4];
57  uint64_t u64[2];
59  uintptr_t uptr[2];
60  intptr_t iptr[2];
61 } SwsOpPriv;
62 
63 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
64 
65 /**
66  * Per-kernel execution context.
67  *
68  * Note: This struct is hard-coded in assembly, so do not change the layout.
69  */
70 typedef void (*SwsFuncPtr)(void);
71 typedef struct SwsOpImpl {
72  SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */
73  SwsOpPriv priv; /* [offset = 16] Private data for this operation. */
74 } SwsOpImpl;
75 
76 static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch");
77 static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch");
78 
79 /**
80  * Compiled "chain" of operations, which can be dispatched efficiently.
81  * Effectively just a list of function pointers, alongside a small amount of
82  * private data for each operation.
83  */
84 typedef struct SwsOpChain {
85 #define SWS_MAX_OPS 16
86  SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */
87  void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *);
88  int num_impl;
89  int cpu_flags; /* set of all used CPU flags */
90  int over_read[4]; /* chain over-reads input by this many bytes */
91  int over_write[4]; /* chain over-writes output by this many bytes */
92 } SwsOpChain;
93 
95 void ff_sws_op_chain_free_cb(void *chain);
96 static inline void ff_sws_op_chain_free(SwsOpChain *chain)
97 {
99 }
100 
101 /* Returns 0 on success, or a negative error code. */
103  void (*free)(SwsOpPriv *), const SwsOpPriv *priv);
104 
105 typedef struct SwsImplParams {
107  union {
108  const SwsUOp *uop;
109  const SwsOp *op;
110  };
112 } SwsImplParams;
113 
114 typedef struct SwsImplResult {
115  SwsFuncPtr func; /* overrides `SwsUOpEntry.func` if non-NULL */
116  SwsOpPriv priv; /* private data for this implementation instance */
117  void (*free)(SwsOpPriv *priv); /* free function for `priv` */
118  int over_read[4]; /* implementation over-reads input by this many bytes */
119  int over_write[4]; /* implementation over-writes output by this many bytes */
120 } SwsImplResult;
121 
122 typedef struct SwsUOpEntry {
123  /* Kernel metadata; reduced size subset of SwsUOp (sans data) */
128 
129  /* Kernel implementation */
131  int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional */
132  bool (*check)(const SwsImplParams *params); /* optional, return true if supported */
133 } SwsUOpEntry;
134 
135 /* Setup helpers for common/trivial operation types */
139 
140 /* Setup helpers for SwsUOp data */
142 int ff_sws_setup_vec4(const SwsImplParams *params, SwsImplResult *out);
143 
144 static inline void ff_op_priv_free(SwsOpPriv *priv)
145 {
146  av_freep(&priv->ptr);
147 }
148 
149 static inline void ff_op_priv_unref(SwsOpPriv *priv)
150 {
151  av_refstruct_unref(&priv->ptr);
152 }
153 
154 struct SwsUOpTable {
155  unsigned cpu_flags; /* required CPU flags for this table */
156  int block_size; /* fixed block size of this table */
157  const SwsUOpEntry *entries[]; /* terminated by NULL */
158 };
159 
160 /**
161  * "Compile" a single uop by looking it up in a list of fixed size uop tables,
162  * in decreasing order of preference.
163  *
164  * Returns 0 or a negative error code.
165  */
166 int ff_sws_uop_lookup(SwsContext *ctx, const SwsUOpTable *const tables[],
167  int num_tables, const SwsUOp *uop, const int block_size,
168  SwsOpChain *chain);
169 
170 #endif
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
SwsImplResult::func
SwsFuncPtr func
Definition: ops_chain.h:115
SwsUOpEntry::mask
SwsCompMask mask
Definition: ops_chain.h:126
SwsOpChain::over_read
int over_read[4]
Definition: ops_chain.h:90
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:85
out
static FILE * out
Definition: movenc.c:55
int64_t
long long int64_t
Definition: coverity.c:34
ff_sws_setup_clear
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:97
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:72
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
SwsUOpEntry::type
SwsPixelType type
Definition: ops_chain.h:125
data
const char data[16]
Definition: mxf.c:149
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
SwsUOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:154
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:149
cpu.h
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SwsOpPriv::u32
uint32_t u32[4]
Definition: ops_chain.h:54
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *chain)
Definition: ops_chain.c:34
SwsOpPriv::DECLARE_ALIGNED_16
DECLARE_ALIGNED_16(char, data)[16]
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsImplParams::table
const SwsUOpTable * table
Definition: ops_chain.h:106
SwsUOpParams
Definition: uops.h:204
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
SwsUOpTable::block_size
int block_size
Definition: ops_chain.h:156
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:65
SwsUOpEntry::uop
SwsUOpType uop
Definition: ops_chain.h:124
SwsOpImpl
Definition: ops_chain.h:71
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SwsOpPriv::i64
int64_t i64[2]
Definition: ops_chain.h:58
SwsOpPriv::f32
float f32[4]
Definition: ops_chain.h:56
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:49
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
SwsUOpTable::entries
const SwsUOpEntry * entries[]
Definition: ops_chain.h:157
SwsImplParams::op
const SwsOp * op
Definition: ops_chain.h:109
SwsUOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:130
SwsUOpEntry::setup
int(* setup)(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.h:131
SwsPixelType
SwsPixelType
Definition: uops.h:38
SwsImplParams
Definition: ops_chain.h:105
SwsUOp
Definition: uops.h:215
ff_sws_setup_vec4
int ff_sws_setup_vec4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:200
SwsOpPriv::u8
uint8_t u8[16]
Definition: ops_chain.h:50
SwsOpPriv::uptr
uintptr_t uptr[2]
Definition: ops_chain.h:59
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:88
SwsOpPriv::i16
int16_t i16[8]
Definition: ops_chain.h:53
SwsOpPriv::u16
uint16_t u16[8]
Definition: ops_chain.h:52
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
SwsImplParams::ctx
SwsContext * ctx
Definition: ops_chain.h:111
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
SwsOpPriv::iptr
intptr_t iptr[2]
Definition: ops_chain.h:60
SwsUOpType
SwsUOpType
Definition: uops.h:101
SwsOpChain::over_write
int over_write[4]
Definition: ops_chain.h:91
ops_internal.h
SwsImplResult::free
void(* free)(SwsOpPriv *priv)
Definition: ops_chain.h:117
SwsOp
Definition: ops.h:230
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:144
SwsUOpEntry::check
bool(* check)(const SwsImplParams *params)
Definition: ops_chain.h:132
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:73
ff_sws_uop_lookup
int ff_sws_uop_lookup(SwsContext *ctx, const SwsUOpTable *const tables[], int num_tables, const SwsUOp *uop, const int block_size, SwsOpChain *chain)
"Compile" a single uop by looking it up in a list of fixed size uop tables, in decreasing order of pr...
Definition: ops_chain.c:116
SwsImplResult::over_read
int over_read[4]
Definition: ops_chain.h:118
ff_sws_setup_scalar
int ff_sws_setup_scalar(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:185
SwsOpPriv::u64
uint64_t u64[2]
Definition: ops_chain.h:57
SwsImplResult::priv
SwsOpPriv priv
Definition: ops_chain.h:116
SwsImplResult::over_write
int over_write[4]
Definition: ops_chain.h:119
mem.h
SwsImplParams::uop
const SwsUOp * uop
Definition: ops_chain.h:108
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
int32_t
int32_t
Definition: audioconvert.c:56
SwsUOpEntry
Definition: ops_chain.h:122
SwsUOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:155
SwsUOpEntry::par
SwsUOpParams par
Definition: ops_chain.h:127
SwsOpPriv::i32
int32_t i32[4]
Definition: ops_chain.h:55
SwsOpPriv::i8
int8_t i8[16]
Definition: ops_chain.h:51
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:80
SwsContext
Main external API structure.
Definition: swscale.h:229
SwsOpPriv
Private data for each kernel.
Definition: ops_chain.h:45
SwsImplResult
Definition: ops_chain.h:114