FFmpeg
swscale_ppc_template.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv2yuvX
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/mem_internal.h"
26 
27 static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize,
28  const int16_t **src, uint8_t *dest,
29  const uint8_t *dither, int offset, int x)
30 {
31  register int i, j;
32  LOCAL_ALIGNED(16, int, val, [16]);
33  vector signed int vo1, vo2, vo3, vo4;
34  vector unsigned short vs1, vs2;
35  vector unsigned char vf;
36  vector unsigned int altivec_vectorShiftInt19 =
37  vec_add(vec_splat_u32(10), vec_splat_u32(9));
38 
39  for (i = 0; i < 16; i++)
40  val[i] = dither[(x + i + offset) & 7] << 12;
41 
42  vo1 = vec_ld(0, val);
43  vo2 = vec_ld(16, val);
44  vo3 = vec_ld(32, val);
45  vo4 = vec_ld(48, val);
46 
47  for (j = 0; j < filterSize; j++) {
48  unsigned int joffset=j<<1;
49  unsigned int xoffset=x<<1;
50  vector unsigned char av_unused perm;
51  vector signed short l1,vLumFilter;
52  LOAD_FILTER(vLumFilter,filter);
53  vLumFilter = vec_splat(vLumFilter, 0);
54  LOAD_L1(l1,src[j],perm);
55  yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter);
56  yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
57  }
58 
59  vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
60  vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
61  vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
62  vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
63  vs1 = vec_packsu(vo1, vo2);
64  vs2 = vec_packsu(vo3, vo4);
65  vf = vec_packsu(vs1, vs2);
66  VEC_ST(vf, 0, dest);
67 }
68 
69 
70 static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
71  const int16_t **src, uint8_t *dest, int dstW,
72  const uint8_t *dither, int offset, int x)
73 {
74  int i, j;
75 
76  for (i = x; i < dstW; i++) {
77  int t = dither[(i + offset) & 7] << 12;
78  for (j = 0; j < filterSize; j++)
79  t += src[j][i] * filter[j];
80  dest[i] = av_clip_uint8(t >> 19);
81  }
82 }
83 
84 static void FUNC(yuv2planeX)(const int16_t *filter, int filterSize,
85  const int16_t **src, uint8_t *dest, int dstW,
86  const uint8_t *dither, int offset)
87 {
88  int dst_u = -(uintptr_t)dest & 15;
89  int i;
90 
91  yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
92 
93  for (i = dst_u; i < dstW - 15; i += 16)
94  FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither,
95  offset, i);
96 
97  yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
98 }
99 
100 static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
101  const uint8_t *src, const int16_t *filter,
102  const int32_t *filterPos, int filterSize)
103 {
104  LOCAL_ALIGNED(16, int, tempo, [4]);
105 
106  switch (filterSize) {
107  case 4:
108  for (register int i = 0; i < dstW; i++) {
109  register int srcPos = filterPos[i];
110 
111  vector unsigned char src_vF = unaligned_load(srcPos, src);
112  vector signed short src_v, filter_v;
113  vector signed int val_vEven, val_s;
114  src_v = // vec_unpackh sign-extends...
115  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
116  // now put our elements in the even slots
117  src_v = vec_mergeh(src_v, (vector signed short)vzero);
118  GET_VF4(i, filter_v, filter);
119  val_vEven = vec_mule(src_v, filter_v);
120  val_s = vec_sums(val_vEven, vzero);
121  vec_st(val_s, 0, tempo);
122  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
123  }
124  break;
125  case 8:
126  for (register int i = 0; i < dstW; i++) {
127  register int srcPos = filterPos[i];
128  vector unsigned char src_vF, av_unused src_v0, av_unused src_v1;
129  vector unsigned char av_unused permS;
130  vector signed short src_v, filter_v;
131  vector signed int val_v, val_s;
132  FIRST_LOAD(src_v0, srcPos, src, permS);
133  LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
134  src_v = // vec_unpackh sign-extends...
135  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
136  filter_v = vec_ld(i << 4, filter);
137  val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
138  val_s = vec_sums(val_v, vzero);
139  vec_st(val_s, 0, tempo);
140  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
141  }
142  break;
143 
144  case 16:
145  for (register int i = 0; i < dstW; i++) {
146  register int srcPos = filterPos[i];
147 
148  vector unsigned char src_vF = unaligned_load(srcPos, src);
149  vector signed short src_vA = // vec_unpackh sign-extends...
150  (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
151  vector signed short src_vB = // vec_unpackh sign-extends...
152  (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
153  vector signed short filter_v0 = vec_ld(i << 5, filter);
154  vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
155 
156  vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
157  vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
158 
159  vector signed int val_s = vec_sums(val_v, vzero);
160 
161  VEC_ST(val_s, 0, tempo);
162  dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
163  }
164  break;
165 
166  default:
167  for (register int i = 0; i < dstW; i++) {
168  register int j;
169  register int srcPos = filterPos[i];
170  register int val = 0;
171  for (j = 0; j < filterSize; j++)
172  val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
173  dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
174  }
175  break;
176  }
177 }
mem_internal.h
av_unused
#define av_unused
Definition: attributes.h:131
yuv2planeX
static void FUNC() yuv2planeX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: swscale_ppc_template.c:84
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
perm
perm
Definition: f_perms.c:75
val
static double val(void *priv, double ch)
Definition: aeval.c:78
LOCAL_ALIGNED
#define LOCAL_ALIGNED(a, t, v,...)
Definition: mem_internal.h:133
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
yuv2planeX_u
static void yuv2planeX_u(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset, int x)
Definition: swscale_ppc_template.c:70
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:104
hScale_real
static void FUNC() hScale_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale_ppc_template.c:100
av_clip_uint8
#define av_clip_uint8
Definition: common.h:105
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
SwsContext
Definition: swscale_internal.h:301
short
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short
Definition: writing_filters.txt:89
yuv2planeX_8_16
static void FUNC() yuv2planeX_8_16(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, const uint8_t *dither, int offset, int x)
Definition: swscale_ppc_template.c:27
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:61