Go to the documentation of this file.
39 #define AVUTIL_AVASSERT_H
41 #define AVUTIL_MACROS_H
43 #define av_assert0(cond) assert(cond)
44 #define av_malloc(s) malloc(s)
45 #define av_mallocz(s) calloc(1, s)
46 #define av_realloc(p, s) realloc(p, s)
47 #define av_strdup(s) strdup(s)
48 #define av_free(p) free(p)
49 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
50 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
54 void **pptr = (
void **) ptr;
66 const uint8_t *elem_data)
68 uint8_t *tab_elem_data =
NULL;
71 tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size;
73 memcpy(tab_elem_data, elem_data, elem_size);
115 buf_appendf(buf,
size,
"ff_sws");
119 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
122 buf_appendf(buf,
size,
"_neon");
179 #define LOOP_VH(s, mask, idx) if (s->use_vh) LOOP(mask, idx)
180 #define LOOP_MASK_VH(s, p, idx) if (s->use_vh) LOOP_MASK(p, idx)
181 #define LOOP_MASK_BWD_VH(s, p, idx) if (s->use_vh) LOOP_MASK_BWD(p, idx)
184 #define CMT(comment) rasm_annotate(r, comment)
185 #define CMTF(fmt, ...) rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)
217 return ((n + 1) >> 1) * 16;
231 i_str(
r, regs[0], sp_pre);
233 i_stp(
r, regs[0], regs[1], sp_pre);
234 for (
unsigned i = 2;
i + 1 < n;
i += 2)
252 i_ldr(
r, regs[0], sp_post);
256 for (
unsigned i = (n & ~1
u) - 2;
i >= 2;
i -= 2)
258 i_ldp(
r, regs[0], regs[1], sp_post);
264 #define MAX_SAVED_REGS 12
270 if (n >= 19 && n <= 30)
271 regs[(*count)++] = gpr;
300 snprintf(func_name,
sizeof(func_name),
"ff_sws_process_%04x_neon",
mask);
346 i_mov(
r,
s->bx,
s->bx_start);
CMT(
"bx = bx_start;");
350 i_mov(
r,
s->impl,
s->op1_impl);
CMT(
"impl = op1_impl;");
355 i_cmp(
r,
s->bx,
s->bx_end);
CMT(
"if (bx != bx_end)");
356 i_bne(
r, next_block);
CMT(
" goto next_block;");
361 i_bne(
r, next_row);
CMT(
" goto next_row;");
408 if (
p->block_size == 16) {
410 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 16 times>};");
411 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
413 i_dup (
r, vtmp.
b8, wtmp);
CMT(
"vtmp.lo = broadcast(tmp);");
414 i_ins (
r, vl[0].de[1], vtmp.
de[0]);
CMT(
"vl[0].hi = vtmp.lo;");
415 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
416 i_and (
r, vl[0].b16, vl[0].b16, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
419 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 8 times>, 0 <repeats 8 times>};");
420 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
421 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
422 i_and (
r, vl[0].b8, vl[0].b8, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
436 rasm_annotate_next(
r,
"v128 nibble_mask = {0xf <repeats 8 times>, 0x0 <repeats 8 times>};");
439 if (
p->block_size == 8) {
442 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
447 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
461 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
482 if (
p->mask == 0x0001) {
497 for (
int i = 0;
i < 4;
i++) {
503 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
536 if (
p->block_size == 8) {
537 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
538 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
541 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
542 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
543 i_ins (
r, vtmp1.
de[0], vl[0].
de[1]);
CMT(
"vtmp1.lo = vl[0].hi;");
544 i_addv(
r, vtmp1.
b, vtmp1.
b8);
CMT(
"vtmp1[0] = add_across(vtmp1);");
545 i_ins (
r, vtmp0.
be[1], vtmp1.
be[0]);
CMT(
"vtmp0[1] = vtmp1[0];");
557 for (
int i = 0;
i < 4;
i++)
562 if (
p->block_size == 8) {
586 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
589 case 0x108:
i_stp(
r, vl[0].d, vh[0].d,
a64op_post(
s->out[0],
s->vec_size * 2));
break;
590 case 0x110:
i_stp(
r, vl[0].q, vh[0].q,
a64op_post(
s->out[0],
s->vec_size * 2));
break;
607 if (
p->mask == 0x0001) {
622 for (
int i = 0;
i < 4;
i++) {
628 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
647 for (
int i = 0;
i < 4;
i++) {
653 case sizeof(uint16_t):
657 case sizeof(uint32_t):
668 #define SWIZZLE_TMP 0xf
673 snprintf(buf,
sizeof(
char[8]),
"vtmp%c", vh ?
'h' :
'l');
675 snprintf(buf,
sizeof(
char[8]),
"v%c[%u]", vh ?
'h' :
'l', n);
678 #define PRINT_SWIZZLE_V(n, vh) print_swizzle_v((char[8]){ 0 }, n, vh)
684 return vh ?
s->vh[n] :
s->vl[n];
702 uint8_t src_used[4] = { 0 };
703 bool done[4] = {
true,
true,
true,
true };
711 for (
bool progress =
true; progress; ) {
714 if (done[
dst] || src_used[
dst])
731 uint8_t cur_dst =
dst;
735 done[cur_dst] =
true;
741 done[cur_dst] =
true;
758 uint32_t mask_val[4] = { 0 };
759 uint8_t mask_idx[4] = { 0 };
773 for (
int j = 0; j < 4; j++) {
774 if (mask_val[j] ==
val) {
775 mask_val[
i] = mask_val[j];
776 mask_idx[
i] = mask_idx[j];
786 if (
val <= 0xff ||
val == 0xffff) {
790 i_dup (
r, vt[cur_vt], mask_gpr);
793 mask_idx[
i] = cur_vt++;
835 uint16_t offset_mask = 0;
849 i_orr (
r, vl[0], vl[0], vl[
i]);
CMTF(
"vl[0] |= vl[%u];",
i);
851 i_orr(
r, vh[0], vh[0], vh[
i]);
CMTF(
"vh[0] |= vh[%u];",
i);
925 for (
int i = 0;
i < 4;
i++) {
930 size_t src_el_size =
s->el_size;
944 if (
p->block_size == 8) {
945 if (src_el_size == 1 && dst_el_size > src_el_size) {
949 }
else if (src_el_size == 4 && dst_el_size < src_el_size) {
956 if (src_el_size == 2 && dst_el_size == 4) {
961 }
else if (src_el_size == 2 && dst_el_size == 1) {
967 if (src_el_size == 1 && dst_el_size == 2) {
971 }
else if (src_el_size == 2 && dst_el_size == 1) {
997 size_t src_el_size =
s->el_size;
999 size_t dst_total_size =
p->block_size * dst_el_size;
1000 size_t dst_vec_size =
FFMIN(dst_total_size, 16);
1003 s->use_vh = (dst_vec_size != dst_total_size);
1005 if (src_el_size == 1) {
1011 if (dst_el_size == 4) {
1104 int save_mask,
bool vh_pass)
1112 RasmOp *vx = vh_pass ?
s->vh :
s->vl;
1113 char cvh = vh_pass ?
'h' :
'l';
1115 if (vh_pass && !
s->use_vh)
1122 RasmOp src_vx[4] = { vx[0], vx[1], vx[2], vx[3] };
1124 for (
int i = 0;
i < 4;
i++) {
1141 for (
int j = 0; j < 5; j++) {
1146 RasmOp vsrc = src_vx[src_j];
1147 uint8_t vc_i = i_coeff / 4;
1148 uint8_t vc_j = i_coeff & 3;
1151 if (
first && is_offset) {
1152 i_dup (
r, vx[
i], vcoeff);
CMTF(
"v%c[%u] = broadcast(vc[%u][%u]);", cvh,
i, vc_i, vc_j);
1153 }
else if (
first && !is_offset) {
1157 i_fmul (
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] = vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1159 }
else if (!
p->linear.fmla) {
1169 i_fmul(
r, vtmp[vc_j], vsrc, vcoeff);
CMTF(
"vtmp[%u] = vsrc[%u] * vc[%u][%u];", vc_j, src_j, vc_i, vc_j);
1171 i_fadd(
r, vx[
i], vx[
i], vtmp[vc_j]);
CMTF(
"v%c[%u] += vtmp[%u];", cvh,
i, vc_j);
1173 i_fadd(
r, vx[
i], vx[
i], vsrc);
CMTF(
"v%c[%u] += vsrc[%u];", cvh,
i, vc_j);
1181 i_fmla(
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] += vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1199 switch (num_vregs) {
1200 case 1: coeff_veclist =
vv_1(vc[0]);
break;
1201 case 2: coeff_veclist =
vv_2(vc[0], vc[1]);
break;
1202 case 3: coeff_veclist =
vv_3(vc[0], vc[1], vc[2]);
break;
1203 case 4: coeff_veclist =
vv_4(vc[0], vc[1], vc[2], vc[3]);
break;
1210 uint16_t save_mask = 0;
1211 bool overwritten[4] = {
false,
false,
false,
false };
1213 for (
int j = 0; j < 5; j++) {
1218 if (!is_offset && overwritten[src_j])
1220 overwritten[
i] =
true;
1266 for (
int y_off = 0; y_off <= max_offset; y_off++) {
1268 if (
MASK_GET(
p->dither.y_offset,
i) == y_off)
1269 sorted[n_comps++] =
i;
1296 const int block_size_log2 = (
p->block_size == 16) ? 4 : 3;
1297 const int dither_size_log2 =
p->dither.size_log2;
1298 const int sizeof_float_log2 = 2;
1299 if (dither_size_log2 != block_size_log2) {
1300 RasmOp lsb =
IMM(block_size_log2 + sizeof_float_log2);
1302 i_ubfiz(
r, tmp1, bx64, lsb,
width);
CMT(
"tmp1 = (bx & ((dither_size / block_size) - 1)) * block_size * sizeof(float);");
1303 i_add (
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1306 int last_y_off = -1;
1308 for (
int sorted_i = 0; sorted_i < n_comps; sorted_i++) {
1309 int i = sorted[sorted_i];
1310 uint8_t y_off =
MASK_GET(
p->dither.y_offset,
i);
1311 bool do_load = (y_off != last_y_off);
1313 if (last_y_off < 0) {
1315 RasmOp lsb =
IMM(dither_size_log2 + sizeof_float_log2);
1322 i_ubfiz(
r, tmp1, y64, lsb,
width);
CMT(
"tmp1 = (y & (dither_size - 1)) * dither_size * sizeof(float);");
1325 i_ubfiz(
r, tmp1, tmp1, lsb,
width);
CMT(
"tmp1 = (tmp1 & (dither_size - 1)) * dither_size * sizeof(float);");
1327 i_add(
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1328 }
else if (do_load) {
1334 int delta = (y_off - last_y_off) * (1 << dither_size_log2) *
sizeof(
float);
1335 i_add(
r, ptr, ptr,
IMM(
delta));
CMTF(
"ptr += (y_off[%u] - y_off[%u]) * dither_size * sizeof(float);",
i, prev_i);
1344 i_fadd (
r, vl[
i], vl[
i], dither_vl);
CMTF(
"vl[%u] += vditherl;",
i);
1346 i_fadd(
r, vh[
i], vh[
i], dither_vh);
CMTF(
"vh[%u] += vditherh;",
i);
1359 bool is_read =
false;
1360 bool is_write =
false;
1378 char func_name[128];
1387 size_t total_size =
p->block_size * el_size;
1389 s->vec_size =
FFMIN(total_size, 16);
1390 s->use_vh = (
s->vec_size != total_size);
1392 s->el_size = el_size;
1393 s->el_count =
s->vec_size / el_size;
1434 i_ldr(
r,
s->cont, impl_post);
CMT(
"SwsFuncPtr cont = (impl++)->cont;");
1446 int prev_levels = 0;
1459 while (prev_fields[prev_levels])
1464 if (params && prev) {
1469 if (first_diff < 0) {
1470 int diff =
field->cmp_val((
void *) (((uintptr_t) params) +
field->offset),
1471 (
void *) (((uintptr_t) prev) +
field->offset));
1480 for (
int i = prev_levels - 1;
i > first_diff;
i--) {
1481 buf_appendf(&buf, &
size,
"%*sreturn NULL;\n", 4 * (
i + 1),
"");
1482 buf_appendf(&buf, &
size,
"%*s}\n", 4 *
i,
"");
1489 for (
int i = first_diff;
i < levels;
i++) {
1491 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
1492 buf_appendf(&buf, &
size,
"%*sif (%s%s == ", 4 * (
i + 1),
"", p_str,
field->name);
1494 buf_appendf(&buf, &
size,
")");
1495 if (
i == (levels - 1)) {
1496 buf_appendf(&buf, &
size,
" return ");
1498 buf_appendf(&buf, &
size,
";\n");
1500 buf_appendf(&buf, &
size,
" {\n");
1522 printf(
"#include \"libswscale/aarch64/ops_lookup.h\"\n");
1526 printf(
"extern void %s(void);\n", buf);
1531 printf(
"SwsFuncPtr ff_sws_aarch64_lookup(const SwsAArch64OpImplParams *p)\n");
1541 printf(
" return NULL;\n");
1640 while (params->
op) {
1649 printf(
"#include \"libavutil/aarch64/asm.S\"\n");
1665 _setmode(_fileno(stdout), _O_BINARY);
1668 for (
int i = 1;
i < argc;
i++) {
1669 if (!strcmp(argv[
i],
"-ops"))
1671 else if (!strcmp(argv[
i],
"-lookup"))
1675 fprintf(stderr,
"Exactly one of -ops or -lookup must be specified.\n");
static void error(const char *err)
static void asmgen_op_write_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_to_vx(int idx)
#define FF_DYNARRAY_ADD(av_size_max, av_elt_size, av_array, av_size, av_success, av_failure)
Add an element to a dynamic array.
#define LINEAR_MASK_GET(mask, idx, jdx)
RasmContext * rasm_alloc(void)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
__device__ int printf(const char *,...)
int main(int argc, char *argv[])
#define LOOP_MASK_BWD_VH(s, p, idx)
#define i_ld1(rctx, op0, op1)
#define LOOP_MASK_BWD(p, idx)
The following structure is used to describe one field from SwsAArch64OpImplParams.
static void reshape_all_vectors(SwsAArch64Context *s, int el_count, int el_size)
static RasmOp a64op_base(RasmOp op)
#define i_zip1(rctx, op0, op1, op2)
#define i_ld4(rctx, op0, op1)
#define i_mul(rctx, op0, op1, op2)
static RasmOp a64op_gpx(uint8_t n)
static void * av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
static void asmgen_op_read_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_w(RasmOp op)
static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)
void rasm_free(RasmContext **prctx)
RasmNode * rasm_set_current_node(RasmContext *rctx, RasmNode *node)
#define i_st4(rctx, op0, op1)
#define u(width, name, range_min, range_max)
static void asmgen_op_max(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
This helper structure is used to mimic the assembler syntax for vector register modifiers.
RasmNode * rasm_get_current_node(RasmContext *rctx)
static void asmgen_op_write_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_gpw(uint8_t n)
#define i_ld3(rctx, op0, op1)
static RasmOp vv_2(RasmOp op0, RasmOp op1)
static RasmOp vv_3(RasmOp op0, RasmOp op1, RasmOp op2)
#define PRINT_SWIZZLE_V(n, vh)
#define i_dup(rctx, op0, op1)
static void asmgen_op_dither(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
void int rasm_print(RasmContext *rctx, FILE *fp)
#define i_ld2(rctx, op0, op1)
#define i_fmla(rctx, op0, op1, op2)
static void asmgen_op_read_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_rev16(rctx, op0, op1)
static void asmgen_op_write_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static unsigned clobbered_gprs(const SwsAArch64Context *s, SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS])
#define i_fmin(rctx, op0, op1, op2)
#define LOOP_MASK_VH(s, p, idx)
void a64op_vec_views(RasmOp op, AArch64VecViews *out)
@ AARCH64_SWS_OP_READ_NIBBLE
@ AARCH64_SWS_OP_SWAP_BYTES
@ AARCH64_SWS_OP_READ_BIT
#define i_st2(rctx, op0, op1)
#define i_st3(rctx, op0, op1)
#define i_ushr(rctx, op0, op1, op2)
Runtime assembler for AArch64.
static void asmgen_op_read_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_addv(rctx, op0, op1)
static void asmgen_op_clear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp swizzle_a64op(SwsAArch64Context *s, uint8_t n, uint8_t vh)
static double val(void *priv, double ch)
RasmNode * rasm_add_label(RasmContext *rctx, int id)
#define i_fadd(rctx, op0, op1, op2)
static void asmgen_op_pack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ld1r(rctx, op0, op1)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static RasmOp vv_1(RasmOp op0)
static RasmOp a64op_elem(RasmOp op, uint8_t idx)
@ AARCH64_SWS_OP_WRITE_NIBBLE
uint16_t SwsAArch64OpMask
static const int offsets[]
static void impl_func_name(char **buf, size_t *size, const SwsAArch64OpImplParams *params)
static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_cont
static void linear_pass(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vt, RasmOp *vc, int save_mask, bool vh_pass)
Performs one pass of the linear transform over a single vector bank (low or high).
#define i_ins(rctx, op0, op1)
static void asmgen_op_convert(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this field
static RasmOp v_8b(RasmOp op)
#define i_ldr(rctx, op0, op1)
static RasmOp a64op_make_vec(uint8_t n, uint8_t el_count, uint8_t el_size)
static const ParamField * op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS]
static void asmgen_op_scale(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the then the processing requires a frame on this link and the filter is expected to make efforts in that direction The status of input links is stored by the fifo and status_out fields
void aarch64_op_impl_func_name(char *buf, size_t size, const SwsAArch64OpImplParams *params)
static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str)
#define i_cmp(rctx, op0, op1)
static void asmgen_op_read_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_out_bump
static size_t aarch64_pixel_size(SwsAArch64PixelType fmt)
static const SwsAArch64OpImplParams impl_params[]
Implementation parameters for all exported functions.
static void asmgen_op_unpack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_fmul(rctx, op0, op1, op2)
static void asmgen_op_write_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
static RasmOp a64op_post(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PACKED
#define i_umin(rctx, op0, op1, op2)
#define LOOP_VH(s, mask, idx)
#define offsetof_exec_out
#define i_add(rctx, op0, op1, op2)
static void asmgen_op_read_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask)
int rasm_new_label(RasmContext *rctx, const char *name)
Allocate a new label ID with the given name.
static RasmOp a64op_sp(void)
@ AARCH64_SWS_OP_WRITE_PLANAR
#define i_uxtl(rctx, op0, op1)
#define LOOP_MASK(p, idx)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define i(width, name, range_min, range_max)
#define i_ldrb(rctx, op0, op1)
#define i_shl(rctx, op0, op1, op2)
#define i_fmax(rctx, op0, op1, op2)
RasmNode * load_cont_node
#define i_zip2(rctx, op0, op1, op2)
#define i_fcvtzu(rctx, op0, op1)
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
static void asmgen_op_read_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
#define i_ucvtf(rctx, op0, op1)
@ AARCH64_SWS_OP_WRITE_BIT
static RasmOp a64op_off(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PLANAR
static void av_freep(void *ptr)
#define i_uxtl2(rctx, op0, op1)
static RasmOp vv_4(RasmOp op0, RasmOp op1, RasmOp op2, RasmOp op3)
#define i_lsr(rctx, op0, op1, op2)
void rasm_annotate_next(RasmContext *rctx, const char *comment)
static unsigned clobbered_frame_size(unsigned n)
#define i_ldp(rctx, op0, op1, op2)
static void asmgen_op_swap_bytes(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_priv
static void asmgen_set_load_cont_node(SwsAArch64Context *s)
Set node where the continuation address will be loaded and impl will be incremented.
static void asmgen_op_write_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_xtn(rctx, op0, op1)
static RasmOp a64op_pre(RasmOp op, int16_t imm)
static void asmgen_op_lshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
void rasm_annotate_nextf(RasmContext *rctx, char *s, size_t n, const char *fmt,...)
static RasmOp rasm_op_label(int id)
#define i_umax(rctx, op0, op1, op2)
#define offsetof_exec_in
These values will be used by ops_asmgen to access fields inside of SwsOpExec and SwsOpImpl.
#define MASK_SET(mask, idx, val)
int rasm_func_begin(RasmContext *rctx, const char *name, bool export, bool jumpable)
#define i_mov16b(rctx, op0, op1)
static int lookup_gen(void)
#define MASK_GET(mask, idx)
#define i_str(rctx, op0, op1)
static void swizzle_emit(SwsAArch64Context *s, uint8_t dst, uint8_t src)
#define i_and(rctx, op0, op1, op2)
#define i_ldrh(rctx, op0, op1)
static void asmgen_op_write_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_prologue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
static void asmgen_op_expand(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ubfiz(rctx, op0, op1, op2, op3)
static void asmgen_epilogue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
#define i_orr(rctx, op0, op1, op2)
@ AARCH64_SWS_OP_WRITE_PACKED
SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType operation.
#define i_rev32(rctx, op0, op1)
#define i_stp(rctx, op0, op1, op2)
static void asmgen_op_min(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_movi(rctx, op0, op1)
static RasmOp a64op_x(RasmOp op)
static void asmgen_op_linear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_in_bump
static void asmgen_op_rshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_is_offset(int idx)
static int linear_num_vregs(const SwsAArch64OpImplParams *params)
static const char * print_swizzle_v(char buf[8], uint8_t n, uint8_t vh)
static void asmgen_op_swizzle(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ushl(rctx, op0, op1, op2)
RasmNode * rasm_add_comment(RasmContext *rctx, const char *comment)
static RasmOp a64op_lr(void)
static uint8_t a64op_gpr_n(RasmOp op)
#define i_mov(rctx, op0, op1)
static RasmOp v_q(RasmOp op)