36 #define vzero vec_splat_s32(0) 38 #define GET_LS(a,b,c,s) {\ 39 vector signed short l2 = vec_ld(((b) << 1) + 16, s);\ 40 ls = vec_perm(a, l2, c);\ 44 #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\ 45 vector signed short ls;\ 46 vector signed int vf1, vf2, i1, i2;\ 47 GET_LS(l1, x, perm, src);\ 48 i1 = vec_mule(filter, ls);\ 49 i2 = vec_mulo(filter, ls);\ 50 vf1 = vec_mergeh(i1, i2);\ 51 vf2 = vec_mergel(i1, i2);\ 52 d1 = vec_add(d1, vf1);\ 53 d2 = vec_add(d2, vf2);\ 56 #define LOAD_FILTER(vf,f) {\ 57 vector unsigned char perm0 = vec_lvsl(joffset, f);\ 58 vf = vec_ld(joffset, f);\ 59 vf = vec_perm(vf, vf, perm0);\ 61 #define LOAD_L1(ll1,s,p){\ 62 p = vec_lvsl(xoffset, s);\ 63 ll1 = vec_ld(xoffset, s);\ 72 #define GET_VF4(a, vf, f) {\ 73 vf = vec_ld(a<< 3, f);\ 75 vf = vec_mergel(vf, (vector signed short)vzero);\ 77 vf = vec_mergeh(vf, (vector signed short)vzero);\ 79 #define FIRST_LOAD(sv, pos, s, per) {\ 81 per = vec_lvsl(pos, s);\ 83 #define UPDATE_PTR(s0, d0, s1, d1) {\ 87 #define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\ 88 v1 = vec_ld(pos + a + 16, s);\ 89 vf = vec_perm(v0, v1, per);\ 91 #define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\ 92 if ((((uintptr_t)s + pos) % 16) > 8) {\ 93 v1 = vec_ld(pos + a + 16, s);\ 95 vf = vec_perm(v0, src_v1, per);\ 97 #define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\ 98 vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\ 99 vf = vec_perm(vf0, vf1, per);\ 102 #define FUNC(name) name ## _altivec 110 #define output_pixel(pos, val, bias, signedness) \ 112 AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ 114 AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ 118 yuv2plane1_float_u(
const int32_t *
src,
float *dest,
int dstW,
int start)
120 static const int big_endian = HAVE_BIGENDIAN;
121 static const int shift = 3;
122 static const float float_mult = 1.0f / 65535.0f;
126 for (i = start; i < dstW; ++
i){
127 val = src[
i] + (1 << (shift - 1));
129 dest[
i] = float_mult * (float)val_uint;
134 yuv2plane1_float_bswap_u(
const int32_t *
src, uint32_t *dest,
int dstW,
int start)
136 static const int big_endian = HAVE_BIGENDIAN;
137 static const int shift = 3;
138 static const float float_mult = 1.0f / 65535.0f;
142 for (i = start; i < dstW; ++
i){
143 val = src[
i] + (1 << (shift - 1));
149 static void yuv2plane1_float_altivec(
const int32_t *src,
float *dest,
int dstW)
151 const int dst_u = -(uintptr_t)dest & 3;
153 const int add = (1 << (shift - 1));
154 const int clip = (1 << 16) - 1;
155 const float fmult = 1.0f / 65535.0f;
159 const vec_f vmul = (
vec_f) {fmult, fmult, fmult, fmult};
165 yuv2plane1_float_u(src, dest, dst_u, 0);
167 for (i = dst_u; i < dstW - 3; i += 4) {
168 v = vec_ld(0, (
const uint32_t *) &src[i]);
169 v = vec_add(v, vadd);
170 v = vec_sr(v, vshift);
171 v = vec_min(v, vlargest);
174 vd = vec_madd(vd, vmul, vzero);
176 vec_st(vd, 0, &dest[i]);
179 yuv2plane1_float_u(src, dest, dstW, i);
182 static void yuv2plane1_float_bswap_altivec(
const int32_t *src, uint32_t *dest,
int dstW)
184 const int dst_u = -(uintptr_t)dest & 3;
186 const int add = (1 << (shift - 1));
187 const int clip = (1 << 16) - 1;
188 const float fmult = 1.0f / 65535.0f;
192 const vec_f vmul = (
vec_f) {fmult, fmult, fmult, fmult};
195 const vec_u16 vswapsmall = vec_splat_u16(8);
200 yuv2plane1_float_bswap_u(src, dest, dst_u, 0);
202 for (i = dst_u; i < dstW - 3; i += 4) {
203 v = vec_ld(0, (
const uint32_t *) &src[i]);
204 v = vec_add(v, vadd);
205 v = vec_sr(v, vshift);
206 v = vec_min(v, vlargest);
209 vd = vec_madd(vd, vmul, vzero);
214 vec_st(vd, 0, (
float *) &dest[i]);
217 yuv2plane1_float_bswap_u(src, dest, dstW, i);
220 #define yuv2plane1_float(template, dest_type, BE_LE) \ 221 static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \ 223 const uint8_t *dither, int offset) \ 225 template((const int32_t *)src, (dest_type *)dest, dstW); \ IEEE-754 single precision Y, 32bpp, big-endian.
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
#define AV_CPU_FLAG_ALTIVEC
standard
static int shift(int a, int b)
packed RGB 8:8:8, 24bpp, RGBRGB...
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
Macro definitions for various function/variable attributes.
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define SWS_FULL_CHR_H_INT
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
enum AVPixelFormat dstFormat
Destination pixel format.
yuv2packedX_fn yuv2packedX
yuv2plane1_float(yuv2plane1_float_c_template, yuv2plane1_float(float, LE)
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
yuv2planar1_fn yuv2plane1
packed RGB 8:8:8, 24bpp, BGRBGR...
yuv2planarX_fn yuv2planeX
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static av_always_inline uint32_t av_float2int(float f)
Reinterpret a float as a 32-bit integer.
Contains misc utility macros and inline functions.
static double clip(void *opaque, double val)
Clip value val in the minval - maxval range.
#define output_pixel(pos, val, bias, signedness)
av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
static float add(float src0, float src1)
IEEE-754 single precision Y, 32bpp, little-endian.
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
AVPixelFormat
Pixel format.
static double val(void *priv, double ch)
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)