FFmpeg
Data Structures | Macros | Functions | Variables
ops_asmgen.c File Reference
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libavutil/dynarray.h"
#include "rasm.c"
#include "rasm_print.c"
#include "ops_impl.c"
#include "ops_entries.c"

Go to the source code of this file.

Data Structures

struct  SwsAArch64Context
 

Macros

#define AVUTIL_AVASSERT_H
 This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries. More...
 
#define AVUTIL_LOG_H
 
#define AVUTIL_MACROS_H
 
#define AVUTIL_MEM_H
 
#define av_assert0(cond)   assert(cond)
 
#define av_malloc(s)   malloc(s)
 
#define av_mallocz(s)   calloc(1, s)
 
#define av_realloc(p, s)   realloc(p, s)
 
#define av_strdup(s)   strdup(s)
 
#define av_free(p)   free(p)
 
#define FFMAX(a, b)   ((a) > (b) ? (a) : (b))
 
#define FFMIN(a, b)   ((a) > (b) ? (b) : (a))
 
#define LOOP_VH(s, mask, idx)   if (s->use_vh) LOOP(mask, idx)
 
#define LOOP_MASK_VH(s, p, idx)   if (s->use_vh) LOOP_MASK(p, idx)
 
#define LOOP_MASK_BWD_VH(s, p, idx)   if (s->use_vh) LOOP_MASK_BWD(p, idx)
 
#define CMT(comment)   rasm_annotate(r, comment)
 
#define CMTF(fmt, ...)   rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)
 
#define MAX_SAVED_REGS   12
 
#define SWIZZLE_TMP   0xf
 
#define PRINT_SWIZZLE_V(n, vh)   print_swizzle_v((char[8]){ 0 }, n, vh)
 

Functions

static void av_freep (void *ptr)
 
static void * av_dynarray2_add (void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
 
static size_t aarch64_pixel_size (SwsAArch64PixelType fmt)
 
static void impl_func_name (char **buf, size_t *size, const SwsAArch64OpImplParams *params)
 
void aarch64_op_impl_func_name (char *buf, size_t size, const SwsAArch64OpImplParams *params)
 
static void reshape_all_vectors (SwsAArch64Context *s, int el_count, int el_size)
 
static unsigned clobbered_frame_size (unsigned n)
 
static void asmgen_prologue (SwsAArch64Context *s, const RasmOp *regs, unsigned n)
 
static void asmgen_epilogue (SwsAArch64Context *s, const RasmOp *regs, unsigned n)
 
static void clobber_gpr (RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)
 
static unsigned clobbered_gprs (const SwsAArch64Context *s, SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS])
 
static void asmgen_process (SwsAArch64Context *s, SwsAArch64OpMask mask)
 
static void asmgen_set_load_cont_node (SwsAArch64Context *s)
 Set node where the continuation address will be loaded and impl will be incremented. More...
 
static void asmgen_op_read_bit (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_nibble (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_packed_1 (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_packed_n (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
 
static void asmgen_op_read_packed (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_planar (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_bit (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_nibble (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_packed_1 (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_packed_n (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
 
static void asmgen_op_write_packed (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_planar (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_swap_bytes (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static const char * print_swizzle_v (char buf[8], uint8_t n, uint8_t vh)
 
static RasmOp swizzle_a64op (SwsAArch64Context *s, uint8_t n, uint8_t vh)
 
static void swizzle_emit (SwsAArch64Context *s, uint8_t dst, uint8_t src)
 
static void asmgen_op_swizzle (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_unpack (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_pack (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_lshift (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_rshift (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_clear (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_convert (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_expand (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_min (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_max (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_scale (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void linear_pass (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vt, RasmOp *vc, int save_mask, bool vh_pass)
 Performs one pass of the linear transform over a single vector bank (low or high). More...
 
static void asmgen_op_linear (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_dither (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_cps (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void aarch64_op_impl_lookup_str (char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str)
 
static int lookup_gen (void)
 
static int asmgen (void)
 
int main (int argc, char *argv[])
 

Variables

static const SwsAArch64OpImplParams impl_params []
 Implementation parameters for all exported functions. More...
 

Macro Definition Documentation

◆ AVUTIL_AVASSERT_H

#define AVUTIL_AVASSERT_H

This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries.

The necessary utils are redefined below using standard C equivalents.

Definition at line 39 of file ops_asmgen.c.

◆ AVUTIL_LOG_H

#define AVUTIL_LOG_H

Definition at line 40 of file ops_asmgen.c.

◆ AVUTIL_MACROS_H

#define AVUTIL_MACROS_H

Definition at line 41 of file ops_asmgen.c.

◆ AVUTIL_MEM_H

#define AVUTIL_MEM_H

Definition at line 42 of file ops_asmgen.c.

◆ av_assert0

#define av_assert0 (   cond)    assert(cond)

Definition at line 43 of file ops_asmgen.c.

◆ av_malloc

#define av_malloc (   s)    malloc(s)
Examples
avio_read_callback.c, hw_decode.c, and qsv_transcode.c.

Definition at line 44 of file ops_asmgen.c.

◆ av_mallocz

#define av_mallocz (   s)    calloc(1, s)

Definition at line 45 of file ops_asmgen.c.

◆ av_realloc

#define av_realloc (   p,
  s 
)    realloc(p, s)

Definition at line 46 of file ops_asmgen.c.

◆ av_strdup

#define av_strdup (   s)    strdup(s)

◆ av_free

#define av_free (   p)    free(p)

Definition at line 48 of file ops_asmgen.c.

◆ FFMAX

#define FFMAX (   a,
  b 
)    ((a) > (b) ? (a) : (b))

Definition at line 49 of file ops_asmgen.c.

◆ FFMIN

#define FFMIN (   a,
  b 
)    ((a) > (b) ? (b) : (a))

Definition at line 50 of file ops_asmgen.c.

◆ LOOP_VH

#define LOOP_VH (   s,
  mask,
  idx 
)    if (s->use_vh) LOOP(mask, idx)

Definition at line 179 of file ops_asmgen.c.

◆ LOOP_MASK_VH

#define LOOP_MASK_VH (   s,
  p,
  idx 
)    if (s->use_vh) LOOP_MASK(p, idx)

Definition at line 180 of file ops_asmgen.c.

◆ LOOP_MASK_BWD_VH

#define LOOP_MASK_BWD_VH (   s,
  p,
  idx 
)    if (s->use_vh) LOOP_MASK_BWD(p, idx)

Definition at line 181 of file ops_asmgen.c.

◆ CMT

#define CMT (   comment)    rasm_annotate(r, comment)

Definition at line 184 of file ops_asmgen.c.

◆ CMTF

#define CMTF (   fmt,
  ... 
)    rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)

Definition at line 185 of file ops_asmgen.c.

◆ MAX_SAVED_REGS

#define MAX_SAVED_REGS   12

Definition at line 264 of file ops_asmgen.c.

◆ SWIZZLE_TMP

#define SWIZZLE_TMP   0xf

Definition at line 668 of file ops_asmgen.c.

◆ PRINT_SWIZZLE_V

#define PRINT_SWIZZLE_V (   n,
  vh 
)    print_swizzle_v((char[8]){ 0 }, n, vh)

Definition at line 678 of file ops_asmgen.c.

Function Documentation

◆ av_freep()

static void av_freep ( void *  ptr)
static

Definition at line 52 of file ops_asmgen.c.

Referenced by av_dynarray2_add().

◆ av_dynarray2_add()

static void* av_dynarray2_add ( void **  tab_ptr,
int *  nb_ptr,
size_t  elem_size,
const uint8_t *  elem_data 
)
static

Definition at line 65 of file ops_asmgen.c.

◆ aarch64_pixel_size()

static size_t aarch64_pixel_size ( SwsAArch64PixelType  fmt)
static

◆ impl_func_name()

static void impl_func_name ( char **  buf,
size_t *  size,
const SwsAArch64OpImplParams params 
)
static

Definition at line 113 of file ops_asmgen.c.

Referenced by aarch64_op_impl_func_name(), and aarch64_op_impl_lookup_str().

◆ aarch64_op_impl_func_name()

void aarch64_op_impl_func_name ( char *  buf,
size_t  size,
const SwsAArch64OpImplParams params 
)

Definition at line 125 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), and lookup_gen().

◆ reshape_all_vectors()

static void reshape_all_vectors ( SwsAArch64Context s,
int  el_count,
int  el_size 
)
static

Definition at line 188 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), asmgen_op_expand(), asmgen_op_pack(), and asmgen_op_unpack().

◆ clobbered_frame_size()

static unsigned clobbered_frame_size ( unsigned  n)
static

Definition at line 215 of file ops_asmgen.c.

Referenced by asmgen_epilogue(), and asmgen_prologue().

◆ asmgen_prologue()

static void asmgen_prologue ( SwsAArch64Context s,
const RasmOp regs,
unsigned  n 
)
static

Definition at line 220 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ asmgen_epilogue()

static void asmgen_epilogue ( SwsAArch64Context s,
const RasmOp regs,
unsigned  n 
)
static

Definition at line 241 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ clobber_gpr()

static void clobber_gpr ( RasmOp  regs[MAX_SAVED_REGS],
unsigned *  count,
RasmOp  gpr 
)
static

Definition at line 266 of file ops_asmgen.c.

Referenced by clobbered_gprs().

◆ clobbered_gprs()

static unsigned clobbered_gprs ( const SwsAArch64Context s,
SwsAArch64OpMask  mask,
RasmOp  regs[MAX_SAVED_REGS] 
)
static

Definition at line 274 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ asmgen_process()

static void asmgen_process ( SwsAArch64Context s,
SwsAArch64OpMask  mask 
)
static

The process function for aarch64 works similarly to the x86 backend. The description in x86/ops_include.asm mostly holds as well here.

Definition at line 289 of file ops_asmgen.c.

Referenced by asmgen().

◆ asmgen_set_load_cont_node()

static void asmgen_set_load_cont_node ( SwsAArch64Context s)
static

Set node where the continuation address will be loaded and impl will be incremented.

This should be done right after impl->priv has been used.

Definition at line 376 of file ops_asmgen.c.

Referenced by asmgen_op_clear(), asmgen_op_cps(), asmgen_op_dither(), asmgen_op_linear(), asmgen_op_max(), asmgen_op_min(), asmgen_op_read_bit(), asmgen_op_scale(), and asmgen_op_write_bit().

◆ asmgen_op_read_bit()

static void asmgen_op_read_bit ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 389 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_nibble()

static void asmgen_op_read_nibble ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 426 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_packed_1()

static void asmgen_op_read_packed_1 ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 452 of file ops_asmgen.c.

Referenced by asmgen_op_read_packed().

◆ asmgen_op_read_packed_n()

static void asmgen_op_read_packed_n ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vx 
)
static

Definition at line 469 of file ops_asmgen.c.

Referenced by asmgen_op_read_packed().

◆ asmgen_op_read_packed()

static void asmgen_op_read_packed ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 480 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_planar()

static void asmgen_op_read_planar ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 491 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_bit()

static void asmgen_op_write_bit ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 519 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_nibble()

static void asmgen_op_write_nibble ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 550 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_packed_1()

static void asmgen_op_write_packed_1 ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 577 of file ops_asmgen.c.

Referenced by asmgen_op_write_packed().

◆ asmgen_op_write_packed_n()

static void asmgen_op_write_packed_n ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vx 
)
static

Definition at line 594 of file ops_asmgen.c.

Referenced by asmgen_op_write_packed().

◆ asmgen_op_write_packed()

static void asmgen_op_write_packed ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 605 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_planar()

static void asmgen_op_write_planar ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 616 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_swap_bytes()

static void asmgen_op_swap_bytes ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 641 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ print_swizzle_v()

static const char* print_swizzle_v ( char  buf[8],
uint8_t  n,
uint8_t  vh 
)
static

Definition at line 670 of file ops_asmgen.c.

◆ swizzle_a64op()

static RasmOp swizzle_a64op ( SwsAArch64Context s,
uint8_t  n,
uint8_t  vh 
)
static

Definition at line 680 of file ops_asmgen.c.

Referenced by swizzle_emit().

◆ swizzle_emit()

static void swizzle_emit ( SwsAArch64Context s,
uint8_t  dst,
uint8_t  src 
)
static

Definition at line 687 of file ops_asmgen.c.

Referenced by asmgen_op_swizzle().

◆ asmgen_op_swizzle()

static void asmgen_op_swizzle ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 699 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_unpack()

static void asmgen_op_unpack ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

All-one values in movi only work up to 8-bit, and then at full 16- or 32-bit, but not for intermediate values like 10-bit. In those cases, we use mov + dup instead.

Definition at line 751 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_pack()

static void asmgen_op_pack ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 823 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_lshift()

static void asmgen_op_lshift ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 861 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_rshift()

static void asmgen_op_rshift ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 875 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_clear()

static void asmgen_op_clear ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

TODO

  • pack elements in impl->priv and perform smaller loads
  • if only 1 element and not vh, load directly with ld1r

Definition at line 889 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_convert()

static void asmgen_op_convert ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Since each instruction in the convert operation needs specific element types, it is simpler to use arrangement specifiers for each operand instead of reshaping all vectors.

This function assumes block_size is either 8 or 16, and that we're always using the most amount of vector registers possible. Therefore, u32 always uses the high vector bank.

Definition at line 913 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_expand()

static void asmgen_op_expand ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 991 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_min()

static void asmgen_op_min ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1023 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_max()

static void asmgen_op_max ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1048 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_scale()

static void asmgen_op_scale ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1073 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ linear_pass()

static void linear_pass ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vt,
RasmOp vc,
int  save_mask,
bool  vh_pass 
)
static

Performs one pass of the linear transform over a single vector bank (low or high).

The intermediate registers for fmul+fadd (for when SWS_BITEXACT is set) start from temp vector 4.

Save rows that need to be used as input after they have been already written to.

The non-zero coefficients have been packed in aarch64_setup_linear() in sequential order into the individual lanes of the coefficient vector registers. We must follow the same order of execution here.

Split the multiply-accumulate into fmul+fadd. All multiplications are performed first into temporary registers, and only then added to the destination, to reduce the dependency chain. There is no need to perform multiplications by 1.

Most modern aarch64 cores have a fastpath for sequences of fmla instructions. This means that even if the coefficient is 1, it is still faster to use fmla by 1 instead of fadd.

Definition at line 1102 of file ops_asmgen.c.

Referenced by asmgen_op_linear().

◆ asmgen_op_linear()

static void asmgen_op_linear ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1188 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_dither()

static void asmgen_op_dither ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

For a description of the matrix buffer layout, read the comments in aarch64_setup_dither() in aarch64/ops.c.

Sort components by y_offset value so that we can start dithering with the smallest value, and increment the pointer upwards for each new offset. The dither matrix is over-allocated and may be over-read at the top, but it cannot be over-read before the start of the buffer. Since we only mask the y offset once, this would be an issue if we tried to subtract a value larger than the initial y_offset.

We use ubfiz to mask and shift left in one single instruction: ubfiz <Wd>, <Wn>, #<lsb>, #<width> Wd = (Wn & ((1 << width) - 1)) << lsb;

Given: block_size = 8, log2(block_size) = 3 dither_size = 16, log2(dither_size) = 4, dither_mask = 0b1111 sizeof(float) = 4, log2(sizeof(float)) = 2

Suppose we have bx = 0bvvvv. To get x, we left shift by log2(block_size) and end up with 0bvvvv000. Then we mask against dither_mask, and end up with 0bv000. Finally we multiply by sizeof(float), which is the same as shifting left by log2(sizeof(float)). The result is 0bv00000.

Therefore: width = log2(dither_size) - log2(block_size) lsb = log2(block_size) + log2(sizeof(float))

The ubfiz instruction for the y offset performs masking by the dither matrix size and shifts by the stride.

On subsequent runs, just increment the pointer. The matrix is over-allocated, so we don't risk overreading.

Definition at line 1233 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_cps()

static void asmgen_op_cps ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Set up vector register dimensions and reshape all vectors accordingly.

Definition at line 1355 of file ops_asmgen.c.

Referenced by asmgen().

◆ aarch64_op_impl_lookup_str()

static void aarch64_op_impl_lookup_str ( char *  buf,
size_t  size,
const SwsAArch64OpImplParams params,
const SwsAArch64OpImplParams prev,
const char *  p_str 
)
static

Definition at line 1442 of file ops_asmgen.c.

Referenced by lookup_gen().

◆ lookup_gen()

static int lookup_gen ( void  )
static

The lookup function matches the SwsAArch64OpImplParams from ops_entries.c to the exported functions generated by asmgen_op(). Each call to aarch64_op_impl_lookup_str() generates a code fragment to uniquely detect the current function, opening and/or closing conditions depending on the parameters of the previous function.

Definition at line 1508 of file ops_asmgen.c.

Referenced by main().

◆ asmgen()

static int asmgen ( void  )
static

The entry point of the SwsOpFunc is the process function. The first kernel function is called from process, and subsequent kernel functions are chained by directly branching to the next operation, using a continuation-passing style design. The last operation must be a write operation, which returns from the call to the process function.

The GPRs used by the entire call-chain are listed below.

Function arguments are passed in r0-r5. After the parameters from exec have been read, r0 is reused to branch to the continuation functions. After the original parameters from impl have been computed, r1 is reused as the impl pointer for each operation.

Loop iterators are r6 for bx and r3 for y, reused from y_start, which doesn't need to be preserved.

The intra-procedure-call temporary registers (r16 and r17) are used as scratch registers. They may be used by call veneers and PLT code inserted by the linker, so we cannot expect them to persist across branches between functions.

The Platform Register (r18) is not used.

The read/write data pointers and padding values first use up the remaining free caller-saved registers, and only then are the caller-saved registers (r19-r28) used.

The Link Register (r30) is used when calling the first kernel, so it must be saved.

Definition at line 1550 of file ops_asmgen.c.

Referenced by main().

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1659 of file ops_asmgen.c.

Variable Documentation

◆ impl_params

const SwsAArch64OpImplParams impl_params[]
static
Initial value:
= {
}

Implementation parameters for all exported functions.

This list is compiled by performing a dummy run of all conversions in sws_ops and collecting all functions that need to be generated. This is achieved by running: make fate-sws-ops-entries-aarch64 GEN=1

Definition at line 93 of file ops_asmgen.c.

Referenced by asmgen(), and lookup_gen().

AARCH64_SWS_OP_NONE
@ AARCH64_SWS_OP_NONE
Definition: ops_impl.h:39