49 t1 = vec_mradds(a1, vx7, vx1); \ 50 t8 = vec_mradds(a1, vx1, vec_subs(zero, vx7)); \ 51 t7 = vec_mradds(a2, vx5, vx3); \ 52 t3 = vec_mradds(ma2, vx3, vx5); \ 55 t5 = vec_adds(vx0, vx4); \ 56 t0 = vec_subs(vx0, vx4); \ 57 t2 = vec_mradds(a0, vx6, vx2); \ 58 t4 = vec_mradds(a0, vx2, vec_subs(zero, vx6)); \ 59 t6 = vec_adds(t8, t3); \ 60 t3 = vec_subs(t8, t3); \ 61 t8 = vec_subs(t1, t7); \ 62 t1 = vec_adds(t1, t7); \ 65 t7 = vec_adds(t5, t2); \ 66 t2 = vec_subs(t5, t2); \ 67 t5 = vec_adds(t0, t4); \ 68 t0 = vec_subs(t0, t4); \ 69 t4 = vec_subs(t8, t3); \ 70 t3 = vec_adds(t8, t3); \ 73 vy0 = vec_adds(t7, t1); \ 74 vy7 = vec_subs(t7, t1); \ 75 vy1 = vec_mradds(c4, t3, t5); \ 76 vy6 = vec_mradds(mc4, t3, t5); \ 77 vy2 = vec_mradds(c4, t4, t0); \ 78 vy5 = vec_mradds(mc4, t4, t0); \ 79 vy3 = vec_adds(t2, t6); \ 80 vy4 = vec_subs(t2, t6) 83 vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ 84 vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \ 86 vec_s16 c4 = vec_splat(constants[0], 0); \ 87 vec_s16 a0 = vec_splat(constants[0], 1); \ 88 vec_s16 a1 = vec_splat(constants[0], 2); \ 89 vec_s16 a2 = vec_splat(constants[0], 3); \ 90 vec_s16 mc4 = vec_splat(constants[0], 4); \ 91 vec_s16 ma2 = vec_splat(constants[0], 5); \ 92 vec_s16 bias = (vec_s16) vec_splat((vec_s32) constants[0], 3); \ 94 vec_s16 zero = vec_splat_s16(0); \ 95 vec_u16 shift = vec_splat_u16(4); \ 97 vec_s16 vx0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero); \ 98 vec_s16 vx1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero); \ 99 vec_s16 vx2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero); \ 100 vec_s16 vx3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero); \ 101 vec_s16 vx4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero); \ 102 vec_s16 vx5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero); \ 103 vec_s16 vx6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero); \ 104 vec_s16 vx7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero); \ 108 vx0 = vec_mergeh(vy0, vy4); \ 109 vx1 = vec_mergel(vy0, vy4); \ 110 vx2 = vec_mergeh(vy1, vy5); \ 111 vx3 = vec_mergel(vy1, vy5); \ 112 vx4 = vec_mergeh(vy2, vy6); \ 113 vx5 = vec_mergel(vy2, vy6); \ 114 vx6 = vec_mergeh(vy3, vy7); \ 115 vx7 = vec_mergel(vy3, vy7); \ 117 vy0 = vec_mergeh(vx0, vx4); \ 118 vy1 = vec_mergel(vx0, vx4); \ 119 vy2 = vec_mergeh(vx1, vx5); \ 120 vy3 = vec_mergel(vx1, vx5); \ 121 vy4 = vec_mergeh(vx2, vx6); \ 122 vy5 = vec_mergel(vx2, vx6); \ 123 vy6 = vec_mergeh(vx3, vx7); \ 124 vy7 = vec_mergel(vx3, vx7); \ 126 vx0 = vec_adds(vec_mergeh(vy0, vy4), bias); \ 127 vx1 = vec_mergel(vy0, vy4); \ 128 vx2 = vec_mergeh(vy1, vy5); \ 129 vx3 = vec_mergel(vy1, vy5); \ 130 vx4 = vec_mergeh(vy2, vy6); \ 131 vx5 = vec_mergel(vy2, vy6); \ 132 vx6 = vec_mergeh(vy3, vy7); \ 133 vx7 = vec_mergel(vy3, vy7); \ 137 shift = vec_splat_u16(6); \ 138 vx0 = vec_sra(vy0, shift); \ 139 vx1 = vec_sra(vy1, shift); \ 140 vx2 = vec_sra(vy2, shift); \ 141 vx3 = vec_sra(vy3, shift); \ 142 vx4 = vec_sra(vy4, shift); \ 143 vx5 = vec_sra(vy5, shift); \ 144 vx6 = vec_sra(vy6, shift); \ 145 vx7 = vec_sra(vy7, shift) 148 { 23170, 13573, 6518, 21895, -23170, -21895, 32, 31 },
149 { 16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725 },
150 { 22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521 },
151 { 21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692 },
152 { 19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722 }
155 static void idct_altivec(int16_t *
blk)
178 #define COPY(dest, src) \ 179 tmp = vec_packsu(src, src); \ 180 vec_ste((vec_u32) tmp, 0, (unsigned int *) dest); \ 181 vec_ste((vec_u32) tmp, 4, (unsigned int *) dest) 212 p0 = vec_lvsl(0, dest);
213 p1 = vec_lvsl(stride, dest);
214 p = vec_splat_u8(-1);
215 perm0 = vec_mergeh(p, p0);
216 perm1 = vec_mergeh(p, p1);
220 #define GET_TMP2(dest, prm) \ 221 tmp = vec_ld(0, dest); \ 222 tmp2 = (vec_s16) vec_perm(tmp, (vec_u8) zero, prm); 224 #define GET_TMP2(dest, prm) \ 225 tmp = vec_vsx_ld(0, dest); \ 226 tmp2 = (vec_s16) vec_mergeh(tmp, (vec_u8) zero) 229 #define ADD(dest, src, perm) \ 230 GET_TMP2(dest, perm); \ 231 tmp3 = vec_adds(tmp2, src); \ 232 tmp = vec_packsu(tmp3, tmp3); \ 233 vec_ste((vec_u32) tmp, 0, (unsigned int *) dest); \ 234 vec_ste((vec_u32) tmp, 4, (unsigned int *) dest) 236 ADD(dest, vx0, perm0);
238 ADD(dest, vx1, perm1);
240 ADD(dest, vx2, perm0);
242 ADD(dest, vx3, perm1);
244 ADD(dest, vx4, perm0);
246 ADD(dest, vx5, perm1);
248 ADD(dest, vx6, perm0);
250 ADD(dest, vx7, perm1);
256 unsigned high_bit_depth)
262 if (!high_bit_depth && avctx->
lowres == 0) {
265 c->
idct = idct_altivec;
Macro definitions for various function/variable attributes.
av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
The exact code depends on how similar the blocks are and how related they are to the block
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
#define PPC_ALTIVEC(flags)
int flags
AV_CODEC_FLAG_*.
static const struct @290 constants[]
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
void(* idct_add)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
void(* idct_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
main external API structure.
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
GLint GLenum GLboolean GLsizei stride
void(* idct)(int16_t *block)
enum idct_permutation_type perm_type