FFmpeg
pixblockdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Brian Foley
3  * Copyright (c) 2002 Dieter Shirley
4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/ppc/cpu.h"
29 
30 #include "libavcodec/avcodec.h"
31 #include "libavcodec/pixblockdsp.h"
32 
33 #if HAVE_ALTIVEC
34 
35 #if HAVE_VSX
36 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
37  ptrdiff_t stride)
38 {
39  int i;
40  vector unsigned char perm =
41  (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
42  0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
43  const vector unsigned char zero =
44  (const vector unsigned char) vec_splat_u8(0);
45 
46  for (i = 0; i < 8; i++) {
47  /* Read potentially unaligned pixels.
48  * We're reading 16 pixels, and actually only want 8,
49  * but we simply ignore the extras. */
50  vector unsigned char bytes = vec_vsx_ld(0, pixels);
51 
52  // Convert the bytes into shorts.
53  //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
54  vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
55 
56  // Save the data to the block, we assume the block is 16-byte aligned.
57  vec_vsx_st(shorts, i * 16, (vector signed short *) block);
58 
59  pixels += stride;
60  }
61 }
62 #else
63 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
64  ptrdiff_t stride)
65 {
66  int i;
67  const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
68 
69  for (i = 0; i < 8; i++) {
70  vec_u8 perm = vec_lvsl(0, pixels);
71  /* Read potentially unaligned pixels.
72  * We're reading 16 pixels, and actually only want 8,
73  * but we simply ignore the extras. */
74  vec_u8 pixl = vec_ld(0, pixels);
75  vec_u8 pixr = vec_ld(7, pixels);
76  vec_u8 bytes = vec_perm(pixl, pixr, perm);
77 
78  // Convert the bytes into shorts.
79  vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
80 
81  // Save the data to the block, we assume the block is 16-byte aligned.
82  vec_st(shorts, i * 16, (vec_s16 *)block);
83 
84  pixels += stride;
85  }
86 }
87 
88 #endif /* HAVE_VSX */
89 
90 #if HAVE_VSX
91 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
92  const uint8_t *s2, ptrdiff_t stride)
93 {
94  int i;
95  const vector unsigned char zero =
96  (const vector unsigned char) vec_splat_u8(0);
97  vector signed short shorts1, shorts2;
98 
99  for (i = 0; i < 4; i++) {
100  /* Read potentially unaligned pixels.
101  * We're reading 16 pixels, and actually only want 8,
102  * but we simply ignore the extras. */
103  vector unsigned char bytes = vec_vsx_ld(0, s1);
104 
105  // Convert the bytes into shorts.
106  shorts1 = (vector signed short) vec_mergeh(bytes, zero);
107 
108  // Do the same for the second block of pixels.
109  bytes =vec_vsx_ld(0, s2);
110 
111  // Convert the bytes into shorts.
112  shorts2 = (vector signed short) vec_mergeh(bytes, zero);
113 
114  // Do the subtraction.
115  shorts1 = vec_sub(shorts1, shorts2);
116 
117  // Save the data to the block, we assume the block is 16-byte aligned.
118  vec_vsx_st(shorts1, 0, (vector signed short *) block);
119 
120  s1 += stride;
121  s2 += stride;
122  block += 8;
123 
124  /* The code below is a copy of the code above...
125  * This is a manual unroll. */
126 
127  /* Read potentially unaligned pixels.
128  * We're reading 16 pixels, and actually only want 8,
129  * but we simply ignore the extras. */
130  bytes = vec_vsx_ld(0, s1);
131 
132  // Convert the bytes into shorts.
133  shorts1 = (vector signed short) vec_mergeh(bytes, zero);
134 
135  // Do the same for the second block of pixels.
136  bytes = vec_vsx_ld(0, s2);
137 
138  // Convert the bytes into shorts.
139  shorts2 = (vector signed short) vec_mergeh(bytes, zero);
140 
141  // Do the subtraction.
142  shorts1 = vec_sub(shorts1, shorts2);
143 
144  // Save the data to the block, we assume the block is 16-byte aligned.
145  vec_vsx_st(shorts1, 0, (vector signed short *) block);
146 
147  s1 += stride;
148  s2 += stride;
149  block += 8;
150  }
151 }
152 #else
153 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
154  const uint8_t *s2, ptrdiff_t stride)
155 {
156  int i;
157  vec_u8 perm;
158  const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
159  vec_s16 shorts1, shorts2;
160 
161  for (i = 0; i < 4; i++) {
162  /* Read potentially unaligned pixels.
163  * We're reading 16 pixels, and actually only want 8,
164  * but we simply ignore the extras. */
165  perm = vec_lvsl(0, s1);
166  vec_u8 pixl = vec_ld(0, s1);
167  vec_u8 pixr = vec_ld(15, s1);
168  vec_u8 bytes = vec_perm(pixl, pixr, perm);
169 
170  // Convert the bytes into shorts.
171  shorts1 = (vec_s16)vec_mergeh(zero, bytes);
172 
173  // Do the same for the second block of pixels.
174  perm = vec_lvsl(0, s2);
175  pixl = vec_ld(0, s2);
176  pixr = vec_ld(15, s2);
177  bytes = vec_perm(pixl, pixr, perm);
178 
179  // Convert the bytes into shorts.
180  shorts2 = (vec_s16)vec_mergeh(zero, bytes);
181 
182  // Do the subtraction.
183  shorts1 = vec_sub(shorts1, shorts2);
184 
185  // Save the data to the block, we assume the block is 16-byte aligned.
186  vec_st(shorts1, 0, (vec_s16 *)block);
187 
188  s1 += stride;
189  s2 += stride;
190  block += 8;
191 
192  /* The code below is a copy of the code above...
193  * This is a manual unroll. */
194 
195  /* Read potentially unaligned pixels.
196  * We're reading 16 pixels, and actually only want 8,
197  * but we simply ignore the extras. */
198  perm = vec_lvsl(0, s1);
199  pixl = vec_ld(0, s1);
200  pixr = vec_ld(15, s1);
201  bytes = vec_perm(pixl, pixr, perm);
202 
203  // Convert the bytes into shorts.
204  shorts1 = (vec_s16)vec_mergeh(zero, bytes);
205 
206  // Do the same for the second block of pixels.
207  perm = vec_lvsl(0, s2);
208  pixl = vec_ld(0, s2);
209  pixr = vec_ld(15, s2);
210  bytes = vec_perm(pixl, pixr, perm);
211 
212  // Convert the bytes into shorts.
213  shorts2 = (vec_s16)vec_mergeh(zero, bytes);
214 
215  // Do the subtraction.
216  shorts1 = vec_sub(shorts1, shorts2);
217 
218  // Save the data to the block, we assume the block is 16-byte aligned.
219  vec_st(shorts1, 0, (vec_s16 *)block);
220 
221  s1 += stride;
222  s2 += stride;
223  block += 8;
224  }
225 }
226 
227 #endif /* HAVE_VSX */
228 
229 #endif /* HAVE_ALTIVEC */
230 
231 #if HAVE_VSX
232 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
233  ptrdiff_t stride)
234 {
235  int i;
236  for (i = 0; i < 8; i++) {
237  vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
238 
239  vec_vsx_st(shorts, i * 16, block);
240 
241  pixels += stride;
242  }
243 }
244 
245 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
246  const uint8_t *s2, ptrdiff_t stride)
247 {
248  int i;
249  vec_s16 shorts1, shorts2;
250  for (i = 0; i < 8; i++) {
251  shorts1 = vsx_ld_u8_s16(0, s1);
252  shorts2 = vsx_ld_u8_s16(0, s2);
253 
254  shorts1 = vec_sub(shorts1, shorts2);
255 
256  vec_vsx_st(shorts1, 0, block);
257 
258  s1 += stride;
259  s2 += stride;
260  block += 8;
261  }
262 }
263 #endif /* HAVE_VSX */
264 
266  AVCodecContext *avctx,
267  unsigned high_bit_depth)
268 {
269 #if HAVE_ALTIVEC
271  return;
272 
273  c->diff_pixels = diff_pixels_altivec;
274 
275  if (!high_bit_depth) {
276  c->get_pixels = get_pixels_altivec;
277  }
278 #endif /* HAVE_ALTIVEC */
279 
280 #if HAVE_VSX
281  if (!PPC_VSX(av_get_cpu_flags()))
282  return;
283 
284  c->diff_pixels = diff_pixels_vsx;
285 
286  if (!high_bit_depth)
287  c->get_pixels = get_pixels_vsx;
288 #endif /* HAVE_VSX */
289 }
stride
int stride
Definition: mace.c:144
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
ff_pixblockdsp_init_ppc
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: pixblockdsp.c:265
perm
perm
Definition: f_perms.c:74
vec_s16
#define vec_s16
Definition: util_altivec.h:37
av_cold
#define av_cold
Definition: attributes.h:90
s1
#define s1
Definition: regdef.h:38
PixblockDSPContext
Definition: pixblockdsp.h:28
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
s2
#define s2
Definition: regdef.h:39
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
cpu.h
vec_u8
#define vec_u8
Definition: util_altivec.h:34
attributes.h
i
int i
Definition: input.c:407
uint8_t
uint8_t
Definition: audio_convert.c:194
avcodec.h
AVCodecContext
main external API structure.
Definition: avcodec.h:536
zero
#define zero
Definition: regdef.h:64
util_altivec.h
cpu.h
PPC_VSX
#define PPC_VSX(flags)
Definition: cpu.h:26
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
short
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short
Definition: writing_filters.txt:89
pixblockdsp.h