FFmpeg
ops_chain.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_OPS_CHAIN_H
22 #define SWSCALE_OPS_CHAIN_H
23 
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem.h"
26 
27 #include "ops_internal.h"
28 
29 /**
30  * Helpers for SIMD implementations based on chained kernels, using a
31  * continuation passing style to link them together.
32  *
33  * The basic idea here is to "link" together a series of different operation
34  * kernels by constructing a list of kernel addresses into an SwsOpChain. Each
35  * kernel will load the address of the next kernel (the "continuation") from
36  * this struct, and jump directly into it; using an internal function signature
37  * that is an implementation detail of the specific backend.
38  */
39 
40 /**
41  * Private data for each kernel.
42  */
43 typedef union SwsOpPriv {
44  DECLARE_ALIGNED_16(char, data)[16];
45 
46  /* Common types */
47  void *ptr;
48  uint8_t u8[16];
49  int8_t i8[16];
50  uint16_t u16[8];
51  int16_t i16[8];
52  uint32_t u32[4];
54  float f32[4];
55  uint64_t u64[2];
57  uintptr_t uptr[2];
58  intptr_t iptr[2];
59 } SwsOpPriv;
60 
61 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
62 
63 /* Setup helpers */
64 int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out);
65 int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out);
66 int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out);
67 int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out);
68 
69 /**
70  * Per-kernel execution context.
71  *
72  * Note: This struct is hard-coded in assembly, so do not change the layout.
73  */
74 typedef void (*SwsFuncPtr)(void);
75 typedef struct SwsOpImpl {
76  SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */
77  SwsOpPriv priv; /* [offset = 16] Private data for this operation. */
78 } SwsOpImpl;
79 
80 static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch");
81 static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch");
82 
83 /**
84  * Compiled "chain" of operations, which can be dispatched efficiently.
85  * Effectively just a list of function pointers, alongside a small amount of
86  * private data for each operation.
87  */
88 typedef struct SwsOpChain {
89 #define SWS_MAX_OPS 16
90  SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */
91  void (*free[SWS_MAX_OPS + 1])(SwsOpPriv);
92  int num_impl;
93  int cpu_flags; /* set of all used CPU flags */
94 } SwsOpChain;
95 
97 void ff_sws_op_chain_free_cb(void *chain);
98 static inline void ff_sws_op_chain_free(SwsOpChain *chain)
99 {
101 }
102 
103 /* Returns 0 on success, or a negative error code. */
105  void (*free)(SwsOpPriv), const SwsOpPriv *priv);
106 
107 typedef struct SwsOpEntry {
108  /* Kernel metadata; reduced size subset of SwsOp */
111  bool flexible; /* if true, only the type and op are matched */
112  bool unused[4]; /* for kernels which operate on a subset of components */
113 
114  union { /* extra data defining the operation, unless `flexible` is true */
119  uint32_t linear_mask; /* subset of SwsLinearOp */
120  int dither_size; /* subset of SwsDitherOp */
121  int clear_value; /* clear value for integer clears */
122  AVRational scale; /* scale factor for SWS_OP_SCALE */
123  };
124 
125  /* Kernel implementation */
127  int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */
128  void (*free)(SwsOpPriv priv);
129 } SwsOpEntry;
130 
131 static inline void ff_op_priv_free(SwsOpPriv priv)
132 {
133  av_free(priv.ptr);
134 }
135 
136 typedef struct SwsOpTable {
137  unsigned cpu_flags; /* required CPU flags for this table */
138  int block_size; /* fixed block size of this table */
139  const SwsOpEntry *entries[]; /* terminated by NULL */
140 } SwsOpTable;
141 
142 /**
143  * "Compile" a single op by looking it up in a list of fixed size op tables.
144  * See `op_match` in `ops_chain.c` for details on how the matching works.
145  *
146  * Returns 0, AVERROR(EAGAIN), or a negative error code.
147  */
148 int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
149  SwsOpList *ops, const int block_size,
150  SwsOpChain *chain);
151 
152 #endif
SwsOpTable
Definition: ops_chain.h:136
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:89
out
static FILE * out
Definition: movenc.c:55
int64_t
long long int64_t
Definition: coverity.c:34
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:76
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
data
const char data[16]
Definition: mxf.c:149
SwsOpEntry::type
SwsPixelType type
Definition: ops_chain.h:110
SwsOpEntry::op
SwsOpType op
Definition: ops_chain.h:109
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:93
SwsOpEntry::setup
int(* setup)(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.h:127
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
SwsOpTable::block_size
int block_size
Definition: ops_chain.h:138
SwsOpPriv::u32
uint32_t u32[4]
Definition: ops_chain.h:52
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:74
SwsOpEntry::scale
AVRational scale
Definition: ops_chain.h:122
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:249
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *chain)
Definition: ops_chain.c:34
SwsOpPriv::DECLARE_ALIGNED_16
DECLARE_ALIGNED_16(char, data)[16]
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsReadWriteOp
Definition: ops.h:100
SwsSwizzleOp
Definition: ops.h:122
ff_sws_setup_q
int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:266
SwsOpEntry::swizzle
SwsSwizzleOp swizzle
Definition: ops_chain.h:117
SwsOpEntry::convert
SwsConvertOp convert
Definition: ops_chain.h:118
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SwsOpImpl
Definition: ops_chain.h:75
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:90
SwsOpTable::entries
const SwsOpEntry * entries[]
Definition: ops_chain.h:139
SwsOpPriv::i64
int64_t i64[2]
Definition: ops_chain.h:56
SwsOpPriv::f32
float f32[4]
Definition: ops_chain.h:54
SwsOpEntry::dither_size
int dither_size
Definition: ops_chain.h:120
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:47
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv priv)
Definition: ops_chain.h:131
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:88
SwsOpEntry::free
void(* free)(SwsOpPriv priv)
Definition: ops_chain.h:128
SwsOpEntry::flexible
bool flexible
Definition: ops_chain.h:111
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
SwsOpEntry::clear_value
int clear_value
Definition: ops_chain.h:121
SwsOpType
SwsOpType
Definition: ops.h:43
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:126
cpu.h
SwsOpPriv::u8
uint8_t u8[16]
Definition: ops_chain.h:48
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:196
SwsOpPriv::uptr
uintptr_t uptr[2]
Definition: ops_chain.h:57
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:92
SwsOpPriv::i16
int16_t i16[8]
Definition: ops_chain.h:51
SwsOpEntry
Definition: ops_chain.h:107
SwsOpPriv::u16
uint16_t u16[8]
Definition: ops_chain.h:50
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv)
Definition: ops_chain.h:91
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:98
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:137
SwsPackOp
Definition: ops.h:114
SwsOpPriv::iptr
intptr_t iptr[2]
Definition: ops_chain.h:58
ops_internal.h
ff_sws_setup_u
int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:255
SwsOp
Definition: ops.h:188
SwsOpEntry::rw
SwsReadWriteOp rw
Definition: ops_chain.h:115
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv), const SwsOpPriv *priv)
Definition: ops_chain.c:48
SwsOpEntry::unused
bool unused[4]
Definition: ops_chain.h:112
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:77
SwsOpPriv::u64
uint64_t u64[2]
Definition: ops_chain.h:55
mem.h
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
int32_t
int32_t
Definition: audioconvert.c:56
SwsConvertOp
Definition: ops.h:136
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:279
SwsOpPriv::i32
int32_t i32[4]
Definition: ops_chain.h:53
SwsOpPriv::i8
int8_t i8[16]
Definition: ops_chain.h:49
SwsOpEntry::pack
SwsPackOp pack
Definition: ops_chain.h:116
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:224
SwsOpPriv
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:43
SwsOpEntry::linear_mask
uint32_t linear_mask
Definition: ops_chain.h:119