FFmpeg
hpeldsp.c
Go to the documentation of this file.
1 /*
2  * Half-pel DSP functions.
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * Half-pel DSP functions.
28  */
29 
30 #include "libavutil/attributes.h"
31 #include "libavutil/intreadwrite.h"
32 #include "hpeldsp.h"
33 
34 #define BIT_DEPTH 8
35 #include "hpel_template.c"
36 #include "pel_template.c"
37 
38 #define PIXOP2(OPNAME, OP) \
39 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
40  const uint8_t *src1, \
41  const uint8_t *src2, \
42  int dst_stride, \
43  int src_stride1, \
44  int src_stride2, \
45  int h) \
46 { \
47  int i; \
48  \
49  for (i = 0; i < h; i++) { \
50  uint32_t a, b; \
51  a = AV_RN32(&src1[i * src_stride1]); \
52  b = AV_RN32(&src2[i * src_stride2]); \
53  OP(*((uint32_t *) &dst[i * dst_stride]), \
54  no_rnd_avg32(a, b)); \
55  a = AV_RN32(&src1[i * src_stride1 + 4]); \
56  b = AV_RN32(&src2[i * src_stride2 + 4]); \
57  OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
58  no_rnd_avg32(a, b)); \
59  } \
60 } \
61  \
62 static inline void OPNAME ## _no_rnd_pixels8_x2_8_c(uint8_t *block, \
63  const uint8_t *pixels, \
64  ptrdiff_t line_size, \
65  int h) \
66 { \
67  OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + 1, \
68  line_size, line_size, line_size, h); \
69 } \
70  \
71 static inline void OPNAME ## _pixels8_x2_8_c(uint8_t *block, \
72  const uint8_t *pixels, \
73  ptrdiff_t line_size, \
74  int h) \
75 { \
76  OPNAME ## _pixels8_l2_8(block, pixels, pixels + 1, \
77  line_size, line_size, line_size, h); \
78 } \
79  \
80 static inline void OPNAME ## _no_rnd_pixels8_y2_8_c(uint8_t *block, \
81  const uint8_t *pixels, \
82  ptrdiff_t line_size, \
83  int h) \
84 { \
85  OPNAME ## _no_rnd_pixels8_l2_8(block, pixels, pixels + line_size, \
86  line_size, line_size, line_size, h); \
87 } \
88  \
89 static inline void OPNAME ## _pixels8_y2_8_c(uint8_t *block, \
90  const uint8_t *pixels, \
91  ptrdiff_t line_size, \
92  int h) \
93 { \
94  OPNAME ## _pixels8_l2_8(block, pixels, pixels + line_size, \
95  line_size, line_size, line_size, h); \
96 } \
97  \
98 static inline void OPNAME ## _pixels4_x2_8_c(uint8_t *block, \
99  const uint8_t *pixels, \
100  ptrdiff_t line_size, \
101  int h) \
102 { \
103  OPNAME ## _pixels4_l2_8(block, pixels, pixels + 1, \
104  line_size, line_size, line_size, h); \
105 } \
106  \
107 static inline void OPNAME ## _pixels4_y2_8_c(uint8_t *block, \
108  const uint8_t *pixels, \
109  ptrdiff_t line_size, \
110  int h) \
111 { \
112  OPNAME ## _pixels4_l2_8(block, pixels, pixels + line_size, \
113  line_size, line_size, line_size, h); \
114 } \
115  \
116 static inline void OPNAME ## _pixels2_x2_8_c(uint8_t *block, \
117  const uint8_t *pixels, \
118  ptrdiff_t line_size, \
119  int h) \
120 { \
121  OPNAME ## _pixels2_l2_8(block, pixels, pixels + 1, \
122  line_size, line_size, line_size, h); \
123 } \
124  \
125 static inline void OPNAME ## _pixels2_y2_8_c(uint8_t *block, \
126  const uint8_t *pixels, \
127  ptrdiff_t line_size, \
128  int h) \
129 { \
130  OPNAME ## _pixels2_l2_8(block, pixels, pixels + line_size, \
131  line_size, line_size, line_size, h); \
132 } \
133  \
134 static inline void OPNAME ## _pixels2_xy2_8_c(uint8_t *block, \
135  const uint8_t *pixels, \
136  ptrdiff_t line_size, \
137  int h) \
138 { \
139  int i, a1, b1; \
140  int a0 = pixels[0]; \
141  int b0 = pixels[1] + 2; \
142  \
143  a0 += b0; \
144  b0 += pixels[2]; \
145  pixels += line_size; \
146  for (i = 0; i < h; i += 2) { \
147  a1 = pixels[0]; \
148  b1 = pixels[1]; \
149  a1 += b1; \
150  b1 += pixels[2]; \
151  \
152  block[0] = (a1 + a0) >> 2; /* FIXME non put */ \
153  block[1] = (b1 + b0) >> 2; \
154  \
155  pixels += line_size; \
156  block += line_size; \
157  \
158  a0 = pixels[0]; \
159  b0 = pixels[1] + 2; \
160  a0 += b0; \
161  b0 += pixels[2]; \
162  \
163  block[0] = (a1 + a0) >> 2; \
164  block[1] = (b1 + b0) >> 2; \
165  pixels += line_size; \
166  block += line_size; \
167  } \
168 } \
169  \
170 static inline void OPNAME ## _pixels4_xy2_8_c(uint8_t *block, \
171  const uint8_t *pixels, \
172  ptrdiff_t line_size, \
173  int h) \
174 { \
175  /* FIXME HIGH BIT DEPTH */ \
176  int i; \
177  const uint32_t a = AV_RN32(pixels); \
178  const uint32_t b = AV_RN32(pixels + 1); \
179  uint32_t l0 = (a & 0x03030303UL) + \
180  (b & 0x03030303UL) + \
181  0x02020202UL; \
182  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
183  ((b & 0xFCFCFCFCUL) >> 2); \
184  uint32_t l1, h1; \
185  \
186  pixels += line_size; \
187  for (i = 0; i < h; i += 2) { \
188  uint32_t a = AV_RN32(pixels); \
189  uint32_t b = AV_RN32(pixels + 1); \
190  l1 = (a & 0x03030303UL) + \
191  (b & 0x03030303UL); \
192  h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
193  ((b & 0xFCFCFCFCUL) >> 2); \
194  OP(*((uint32_t *) block), h0 + h1 + \
195  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
196  pixels += line_size; \
197  block += line_size; \
198  a = AV_RN32(pixels); \
199  b = AV_RN32(pixels + 1); \
200  l0 = (a & 0x03030303UL) + \
201  (b & 0x03030303UL) + \
202  0x02020202UL; \
203  h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
204  ((b & 0xFCFCFCFCUL) >> 2); \
205  OP(*((uint32_t *) block), h0 + h1 + \
206  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
207  pixels += line_size; \
208  block += line_size; \
209  } \
210 } \
211  \
212 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
213  const uint8_t *pixels, \
214  ptrdiff_t line_size, \
215  int h) \
216 { \
217  /* FIXME HIGH BIT DEPTH */ \
218  int j; \
219  \
220  for (j = 0; j < 2; j++) { \
221  int i; \
222  const uint32_t a = AV_RN32(pixels); \
223  const uint32_t b = AV_RN32(pixels + 1); \
224  uint32_t l0 = (a & 0x03030303UL) + \
225  (b & 0x03030303UL) + \
226  0x02020202UL; \
227  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
228  ((b & 0xFCFCFCFCUL) >> 2); \
229  uint32_t l1, h1; \
230  \
231  pixels += line_size; \
232  for (i = 0; i < h; i += 2) { \
233  uint32_t a = AV_RN32(pixels); \
234  uint32_t b = AV_RN32(pixels + 1); \
235  l1 = (a & 0x03030303UL) + \
236  (b & 0x03030303UL); \
237  h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
238  ((b & 0xFCFCFCFCUL) >> 2); \
239  OP(*((uint32_t *) block), h0 + h1 + \
240  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
241  pixels += line_size; \
242  block += line_size; \
243  a = AV_RN32(pixels); \
244  b = AV_RN32(pixels + 1); \
245  l0 = (a & 0x03030303UL) + \
246  (b & 0x03030303UL) + \
247  0x02020202UL; \
248  h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
249  ((b & 0xFCFCFCFCUL) >> 2); \
250  OP(*((uint32_t *) block), h0 + h1 + \
251  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
252  pixels += line_size; \
253  block += line_size; \
254  } \
255  pixels += 4 - line_size * (h + 1); \
256  block += 4 - line_size * h; \
257  } \
258 } \
259  \
260 static inline void OPNAME ## _no_rnd_pixels8_xy2_8_c(uint8_t *block, \
261  const uint8_t *pixels, \
262  ptrdiff_t line_size, \
263  int h) \
264 { \
265  /* FIXME HIGH BIT DEPTH */ \
266  int j; \
267  \
268  for (j = 0; j < 2; j++) { \
269  int i; \
270  const uint32_t a = AV_RN32(pixels); \
271  const uint32_t b = AV_RN32(pixels + 1); \
272  uint32_t l0 = (a & 0x03030303UL) + \
273  (b & 0x03030303UL) + \
274  0x01010101UL; \
275  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
276  ((b & 0xFCFCFCFCUL) >> 2); \
277  uint32_t l1, h1; \
278  \
279  pixels += line_size; \
280  for (i = 0; i < h; i += 2) { \
281  uint32_t a = AV_RN32(pixels); \
282  uint32_t b = AV_RN32(pixels + 1); \
283  l1 = (a & 0x03030303UL) + \
284  (b & 0x03030303UL); \
285  h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
286  ((b & 0xFCFCFCFCUL) >> 2); \
287  OP(*((uint32_t *) block), h0 + h1 + \
288  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
289  pixels += line_size; \
290  block += line_size; \
291  a = AV_RN32(pixels); \
292  b = AV_RN32(pixels + 1); \
293  l0 = (a & 0x03030303UL) + \
294  (b & 0x03030303UL) + \
295  0x01010101UL; \
296  h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
297  ((b & 0xFCFCFCFCUL) >> 2); \
298  OP(*((uint32_t *) block), h0 + h1 + \
299  (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
300  pixels += line_size; \
301  block += line_size; \
302  } \
303  pixels += 4 - line_size * (h + 1); \
304  block += 4 - line_size * h; \
305  } \
306 } \
307  \
308 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_8_c, \
309  OPNAME ## _pixels8_x2_8_c, \
310  8) \
311 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_8_c, \
312  OPNAME ## _pixels8_y2_8_c, \
313  8) \
314 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
315  OPNAME ## _pixels8_xy2_8_c, \
316  8) \
317 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_8_c, \
318  OPNAME ## _pixels8_8_c, \
319  8) \
320 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_8_c, \
321  OPNAME ## _no_rnd_pixels8_x2_8_c, \
322  8) \
323 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_8_c, \
324  OPNAME ## _no_rnd_pixels8_y2_8_c, \
325  8) \
326 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_8_c, \
327  OPNAME ## _no_rnd_pixels8_xy2_8_c, \
328  8) \
329 
330 #define op_avg(a, b) a = rnd_avg32(a, b)
331 #define op_put(a, b) a = b
332 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
333 PIXOP2(avg, op_avg)
334 PIXOP2(put, op_put)
335 #undef op_avg
336 #undef op_put
337 
339 {
340 #define hpel_funcs(prefix, idx, num) \
341  c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
342  c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
343  c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
344  c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
345 
346  hpel_funcs(put, [0], 16);
347  hpel_funcs(put, [1], 8);
348  hpel_funcs(put, [2], 4);
349  hpel_funcs(put, [3], 2);
350  hpel_funcs(put_no_rnd, [0], 16);
351  hpel_funcs(put_no_rnd, [1], 8);
352  hpel_funcs(avg, [0], 16);
353  hpel_funcs(avg, [1], 8);
354  hpel_funcs(avg, [2], 4);
355  hpel_funcs(avg, [3], 2);
356  hpel_funcs(avg_no_rnd,, 16);
357 
358 #if ARCH_AARCH64
360 #elif ARCH_ARM
362 #elif ARCH_PPC
364 #elif ARCH_X86
366 #elif ARCH_MIPS
368 #elif ARCH_LOONGARCH64
370 #endif
371 }
pel_template.c
ff_hpeldsp_init_loongarch
void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags)
Definition: hpeldsp_init_loongarch.c:26
op_put
#define op_put(a, b)
Definition: hpeldsp.c:331
op_avg
#define op_avg(a, b)
Definition: hpeldsp.c:330
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
ff_hpeldsp_init
av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
Definition: hpeldsp.c:338
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
HpelDSPContext
Half-pel DSP context.
Definition: hpeldsp.h:45
ff_hpeldsp_init_x86
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
Definition: hpeldsp_init.c:229
ff_hpeldsp_init_aarch64
av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
Definition: hpeldsp_init_aarch64.c:86
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
attributes.h
ff_hpeldsp_init_ppc
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
Definition: hpeldsp_altivec.c:368
hpel_funcs
#define hpel_funcs(prefix, idx, num)
ff_hpeldsp_init_mips
void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
Definition: hpeldsp_init_mips.c:26
ff_hpeldsp_init_arm
av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags)
Definition: hpeldsp_init_arm.c:45
hpel_template.c
PIXOP2
#define PIXOP2(OPNAME, OP)
Definition: hpeldsp.c:38
hpeldsp.h
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482