FFmpeg
Macros | Functions | Variables
ops.c File Reference
#include <float.h>
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/x86/cpu.h"
#include "../ops_chain.h"
#include "../uops.h"
#include "../uops_macros.h"

Go to the source code of this file.

Macros

#define REF_ENTRY(EXT, NAME, ...)   &op_##NAME##EXT,
 
#define DECL_ENTRY(EXT, CHECK, SETUP, NAME, ...)
 
#define DECL_OPS_COMMON(EXT, TYPE)
 
#define REF_OPS_COMMON(EXT, TYPE)
 
#define DECL_TABLE_U8(EXT, SIZE, FLAG)
 
#define DECL_TABLE_U16(EXT, SIZE, FLAG)
 
#define DECL_TABLE_U32(EXT, SIZE, FLAG)
 
#define DECL_TABLE_F32(EXT, SIZE, FLAG)
 
#define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT)
 

Functions

static int setup_rw_packed (const SwsImplParams *params, SwsImplResult *out)
 Copyright (C) 2025-2026 Niklas Haas. More...
 
static int setup_filter_v (const SwsImplParams *params, SwsImplResult *out)
 
static int hscale_sizeof_weight (const SwsUOp *uop)
 
static int setup_filter_h (const SwsImplParams *params, SwsImplResult *out)
 
static bool check_filter_h_4x4 (const SwsImplParams *params)
 
static int setup_filter_h_4x4 (const SwsImplParams *params, SwsImplResult *out)
 
static int setup_scale (const SwsImplParams *params, SwsImplResult *out)
 
static int setup_clear (const SwsImplParams *params, SwsImplResult *out)
 
static int setup_dither (const SwsImplParams *params, SwsImplResult *out)
 
static int setup_linear (const SwsImplParams *params, SwsImplResult *out)
 
static bool uop_is_type_invariant (const SwsUOpType uop)
 
 SWS_DECL_FUNC (ff_sws_process1_x86)
 
 SWS_DECL_FUNC (ff_sws_process2_x86)
 
 SWS_DECL_FUNC (ff_sws_process3_x86)
 
 SWS_DECL_FUNC (ff_sws_process4_x86)
 
static int movsize (const int bytes, const int mmsize)
 
static int solve_shuffle (const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
 
static uint32_t expand32 (const SwsPixelType type, const SwsPixel value)
 
static void normalize_clear (SwsUOp *uop)
 
static int compile (SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
 

Variables

static const SwsOpTable *const tables []
 
const SwsOpBackend backend_x86
 

Macro Definition Documentation

◆ REF_ENTRY

#define REF_ENTRY (   EXT,
  NAME,
  ... 
)    &op_##NAME##EXT,

Definition at line 300 of file ops.c.

◆ DECL_ENTRY

#define DECL_ENTRY (   EXT,
  CHECK,
  SETUP,
  NAME,
  ... 
)
Value:
void ff_##NAME##EXT(void); \
static const SwsOpEntry op_##NAME##EXT = { \
.func = (SwsFuncPtr) ff_##NAME##EXT, \
.check = CHECK, \
.setup = SETUP, \
__VA_ARGS__, \
};

Definition at line 301 of file ops.c.

◆ DECL_OPS_COMMON

#define DECL_OPS_COMMON (   EXT,
  TYPE 
)
Value:
SWS_FOR_STRUCT(TYPE, READ_NIBBLE, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, READ_BIT, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, WRITE_PACKED, DECL_ENTRY, EXT, NULL, setup_rw_packed) \
SWS_FOR_STRUCT(TYPE, WRITE_NIBBLE, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, WRITE_BIT, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, SWAP_BYTES, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, EXPAND_BIT, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, MOVE, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, SCALE, DECL_ENTRY, EXT, NULL, setup_scale) \
SWS_FOR_STRUCT(TYPE, ADD, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
SWS_FOR_STRUCT(TYPE, MIN, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
SWS_FOR_STRUCT(TYPE, MAX, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \
SWS_FOR_STRUCT(TYPE, UNPACK, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, PACK, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, LSHIFT, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, RSHIFT, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(TYPE, LINEAR_FMA, DECL_ENTRY, EXT, NULL, setup_linear) \
SWS_FOR_STRUCT(TYPE, DITHER, DECL_ENTRY, EXT, NULL, setup_dither) \
/* end of macro */

Definition at line 311 of file ops.c.

◆ REF_OPS_COMMON

#define REF_OPS_COMMON (   EXT,
  TYPE 
)
Value:
SWS_FOR(TYPE, READ_PACKED, REF_ENTRY, EXT) \
SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY, EXT) \
SWS_FOR(TYPE, READ_BIT, REF_ENTRY, EXT) \
SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY, EXT) \
SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY, EXT) \
SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY, EXT) \
SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY, EXT) \
SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY, EXT) \
SWS_FOR(TYPE, MOVE, REF_ENTRY, EXT) \
SWS_FOR(TYPE, SCALE, REF_ENTRY, EXT) \
SWS_FOR(TYPE, ADD, REF_ENTRY, EXT) \
SWS_FOR(TYPE, MIN, REF_ENTRY, EXT) \
SWS_FOR(TYPE, MAX, REF_ENTRY, EXT) \
SWS_FOR(TYPE, UNPACK, REF_ENTRY, EXT) \
SWS_FOR(TYPE, PACK, REF_ENTRY, EXT) \
SWS_FOR(TYPE, LSHIFT, REF_ENTRY, EXT) \
SWS_FOR(TYPE, RSHIFT, REF_ENTRY, EXT) \
SWS_FOR(TYPE, LINEAR_FMA, REF_ENTRY, EXT) \
SWS_FOR(TYPE, DITHER, REF_ENTRY, EXT) \
/* end of macro */

Definition at line 333 of file ops.c.

◆ DECL_TABLE_U8

#define DECL_TABLE_U8 (   EXT,
  SIZE,
  FLAG 
)
Value:
SWS_FOR_STRUCT(U8, READ_PLANAR, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U8, WRITE_PLANAR, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U8, CLEAR, DECL_ENTRY, EXT, NULL, setup_clear) \
\
static const SwsOpTable ops_u8##EXT = { \
.cpu_flags = AV_CPU_FLAG_##FLAG, \
.block_size = SIZE, \
.entries = { \
REF_OPS_COMMON(EXT, U8) \
SWS_FOR(U8, READ_PLANAR, REF_ENTRY, EXT) \
SWS_FOR(U8, WRITE_PLANAR, REF_ENTRY, EXT) \
SWS_FOR(U8, CLEAR, REF_ENTRY, EXT) \
NULL \
}, \
};

Definition at line 355 of file ops.c.

◆ DECL_TABLE_U16

#define DECL_TABLE_U16 (   EXT,
  SIZE,
  FLAG 
)
Value:
SWS_FOR_STRUCT(U8, TO_U16, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U16, TO_U8, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U8, EXPAND_PAIR, DECL_ENTRY, EXT, NULL, NULL) \
\
static const SwsOpTable ops_u16##EXT = { \
.cpu_flags = AV_CPU_FLAG_##FLAG, \
.block_size = SIZE, \
.entries = { \
REF_OPS_COMMON(EXT, U16) \
SWS_FOR(U8, TO_U16, REF_ENTRY, EXT) \
SWS_FOR(U16, TO_U8, REF_ENTRY, EXT) \
SWS_FOR(U8, EXPAND_PAIR, REF_ENTRY, EXT) \
NULL \
}, \
};

Definition at line 373 of file ops.c.

◆ DECL_TABLE_U32

#define DECL_TABLE_U32 (   EXT,
  SIZE,
  FLAG 
)
Value:
SWS_FOR_STRUCT(U8, TO_U32, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U32, TO_U8, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U16, TO_U32, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U32, TO_U16, DECL_ENTRY, EXT, NULL, NULL) \
SWS_FOR_STRUCT(U8, EXPAND_QUAD, DECL_ENTRY, EXT, NULL, NULL) \
\
static const SwsOpTable ops_u32##EXT = { \
.cpu_flags = AV_CPU_FLAG_##FLAG, \
.block_size = SIZE, \
.entries = { \
REF_OPS_COMMON(EXT, U32) \
SWS_FOR(U8, TO_U32, REF_ENTRY, EXT) \
SWS_FOR(U32, TO_U8, REF_ENTRY, EXT) \
SWS_FOR(U16, TO_U32, REF_ENTRY, EXT) \
SWS_FOR(U32, TO_U16, REF_ENTRY, EXT) \
SWS_FOR(U8, EXPAND_QUAD, REF_ENTRY, EXT) \
NULL \
}, \
};

Definition at line 391 of file ops.c.

◆ DECL_TABLE_F32

#define DECL_TABLE_F32 (   EXT,
  SIZE,
  FLAG 
)

Definition at line 413 of file ops.c.

◆ ASSIGN_SHUFFLE_FUNC

#define ASSIGN_SHUFFLE_FUNC (   IN,
  OUT,
  EXT 
)
Value:
do { \
SWS_DECL_FUNC(ff_packed_shuffle##IN##_##OUT##_##EXT); \
if (in_total == IN && out_total == OUT) \
out->func = ff_packed_shuffle##IN##_##OUT##_##EXT; \
} while (0)

Function Documentation

◆ setup_rw_packed()

static int setup_rw_packed ( const SwsImplParams params,
SwsImplResult out 
)
static

Copyright (C) 2025-2026 Niklas Haas.

This file is part of FFmpeg.

FFmpeg is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.

FFmpeg is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along with FFmpeg; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

Definition at line 31 of file ops.c.

◆ setup_filter_v()

static int setup_filter_v ( const SwsImplParams params,
SwsImplResult out 
)
static

Definition at line 46 of file ops.c.

◆ hscale_sizeof_weight()

static int hscale_sizeof_weight ( const SwsUOp uop)
static

Definition at line 66 of file ops.c.

Referenced by setup_filter_h(), and setup_filter_h_4x4().

◆ setup_filter_h()

static int setup_filter_h ( const SwsImplParams params,
SwsImplResult out 
)
static

vpgatherdd gathers 32 bits at a time; so if we're filtering a smaller size, we need to gather 2/4 taps simultaneously and unroll the inner loop over several packed samples.

Transpose filter weights to group (aligned) taps by block

Definition at line 76 of file ops.c.

◆ check_filter_h_4x4()

static bool check_filter_h_4x4 ( const SwsImplParams params)
static

Otherwise, prefer it above a certain filter size. Empirically, this kernel seems to be faster whenever the reference/gather kernel crosses a breakpoint for the number of gathers needed, but this filter doesn't.

Tested on a Lunar Lake (Intel Core Ultra 7 258V) system.

Definition at line 164 of file ops.c.

◆ setup_filter_h_4x4()

static int setup_filter_h_4x4 ( const SwsImplParams params,
SwsImplResult out 
)
static

Desired memory layout: [w][taps][pixels_align][taps_align]

Example with taps_align=8, pixels_align=4: [a0, a1, ... a7] weights for pixel 0, taps 0..7 [b0, b1, ... b7] weights for pixel 1, taps 0..7 [c0, c1, ... c7] weights for pixel 2, taps 0..7 [d0, d1, ... d7] weights for pixel 3, taps 0..7 [a8, a9, ... a15] weights for pixel 0, taps 8..15 ... repeat for all taps, then move on to pixels 4..7, etc.

Definition at line 188 of file ops.c.

◆ setup_scale()

static int setup_scale ( const SwsImplParams params,
SwsImplResult out 
)
static

Definition at line 251 of file ops.c.

◆ setup_clear()

static int setup_clear ( const SwsImplParams params,
SwsImplResult out 
)
static

Definition at line 265 of file ops.c.

◆ setup_dither()

static int setup_dither ( const SwsImplParams params,
SwsImplResult out 
)
static

Definition at line 273 of file ops.c.

◆ setup_linear()

static int setup_linear ( const SwsImplParams params,
SwsImplResult out 
)
static

Definition at line 280 of file ops.c.

◆ uop_is_type_invariant()

static bool uop_is_type_invariant ( const SwsUOpType  uop)
static

Definition at line 288 of file ops.c.

Referenced by compile().

◆ SWS_DECL_FUNC() [1/4]

SWS_DECL_FUNC ( ff_sws_process1_x86  )

◆ SWS_DECL_FUNC() [2/4]

SWS_DECL_FUNC ( ff_sws_process2_x86  )

◆ SWS_DECL_FUNC() [3/4]

SWS_DECL_FUNC ( ff_sws_process3_x86  )

◆ SWS_DECL_FUNC() [4/4]

SWS_DECL_FUNC ( ff_sws_process4_x86  )

◆ movsize()

static int movsize ( const int  bytes,
const int  mmsize 
)
static

Definition at line 485 of file ops.c.

Referenced by solve_shuffle().

◆ solve_shuffle()

static int solve_shuffle ( const SwsOpList ops,
int  mmsize,
SwsCompiledOp out 
)
static

Definition at line 492 of file ops.c.

Referenced by compile().

◆ expand32()

static uint32_t expand32 ( const SwsPixelType  type,
const SwsPixel  value 
)
static

Definition at line 557 of file ops.c.

Referenced by normalize_clear().

◆ normalize_clear()

static void normalize_clear ( SwsUOp uop)
static

Definition at line 568 of file ops.c.

Referenced by compile().

◆ compile()

static int compile ( SwsContext ctx,
const SwsOpList ops,
SwsCompiledOp out 
)
static

Definition at line 574 of file ops.c.

Variable Documentation

◆ tables

const SwsOpTable* const tables[]
static
Initial value:
= {
&ops_u8_m1_sse4,
&ops_u8_m1_avx2,
&ops_u8_m2_sse4,
&ops_u8_m2_avx2,
&ops_u16_m1_avx2,
&ops_u16_m2_avx2,
&ops_u32_m2_avx2,
&ops_f32_m2_avx2,
}

Definition at line 469 of file ops.c.

Referenced by compile().

◆ backend_x86

const SwsOpBackend backend_x86
Initial value:
= {
.name = "x86",
.flags = SWS_BACKEND_X86,
.compile = compile,
.hw_format = AV_PIX_FMT_NONE,
}

Definition at line 663 of file ops.c.

SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:154
U8
@ U8
Definition: sw_ops.c:41
MAX
#define MAX
Definition: blend_modes.c:46
setup_linear
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:280
U32
@ U32
Definition: sw_ops.c:43
SWS_BACKEND_X86
@ SWS_BACKEND_X86
Chained x86 SIMD kernels.
Definition: swscale.h:118
setup_dither
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:273
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
REF_ENTRY
#define REF_ENTRY(EXT, NAME,...)
Definition: ops.c:300
CHECK
CHECK(-1) CHECK(-2) }} }} CHECK(1) CHECK(2) }} }} } if(diff0+diff1 > 0) temp -
SWS_FOR
#define SWS_FOR(TYPE, UOP, MACRO,...)
Definition: uops_macros.h:17
SWS_FOR_STRUCT
#define SWS_FOR_STRUCT(TYPE, UOP, MACRO,...)
Definition: uops_macros.h:19
FLAG
#define FLAG(STEREOMODETYPE, BOOL)
check
#define check(x, y, S, v)
Definition: motion_est_template.c:405
DECL_OPS_COMMON
#define DECL_OPS_COMMON(EXT, TYPE)
Definition: ops.c:311
SCALE
#define SCALE(c)
Definition: dcadata.c:7338
setup_clear
static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:265
RSHIFT
#define RSHIFT(a, b)
Definition: common.h:56
TYPE
#define TYPE
Definition: ffv1dec.c:96
compile
static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: ops.c:574
DECL_ENTRY
#define DECL_ENTRY(EXT, CHECK, SETUP, NAME,...)
Definition: ops.c:301
NULL
#define NULL
Definition: coverity.c:32
ADD
#define ADD(a, b)
Definition: dct32_template.c:123
U16
@ U16
Definition: sw_ops.c:42
MOVE
#define MOVE
Definition: rasc.c:45
ff_sws_setup_vec4
int ff_sws_setup_vec4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:200
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:130
SIZE
#define SIZE
Definition: bitstream_template.c:36
SwsOpEntry
Definition: ops_chain.h:122
IN
#define IN(x)
Definition: vp9dsp_template.c:1200
OUT
@ OUT
Definition: af_loudnorm.c:41
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:155
MIN
#define MIN(a, b)
Definition: qt-faststart.c:45
SETUP
@ SETUP
Definition: rtspcodes.h:133
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
setup_rw_packed
static int setup_rw_packed(const SwsImplParams *params, SwsImplResult *out)
Copyright (C) 2025-2026 Niklas Haas.
Definition: ops.c:31
CLEAR
#define CLEAR(destin)
Definition: wavpackenc.c:50
setup_scale
static int setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:251
_
#define _