FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dsputil_template.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * DSP utils
28  */
29 
30 #include "bit_depth_template.c"
31 
32 /* draw the edges of width 'w' of an image of size width, height */
33 //FIXME check that this is ok for mpeg4 interlaced
34 static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides)
35 {
36  pixel *buf = (pixel*)p_buf;
37  int wrap = p_wrap / sizeof(pixel);
38  pixel *ptr, *last_line;
39  int i;
40 
41  /* left and right */
42  ptr = buf;
43  for(i=0;i<height;i++) {
44 #if BIT_DEPTH > 8
45  int j;
46  for (j = 0; j < w; j++) {
47  ptr[j-w] = ptr[0];
48  ptr[j+width] = ptr[width-1];
49  }
50 #else
51  memset(ptr - w, ptr[0], w);
52  memset(ptr + width, ptr[width-1], w);
53 #endif
54  ptr += wrap;
55  }
56 
57  /* top and bottom + corners */
58  buf -= w;
59  last_line = buf + (height - 1) * wrap;
60  if (sides & EDGE_TOP)
61  for(i = 0; i < h; i++)
62  memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top
63  if (sides & EDGE_BOTTOM)
64  for (i = 0; i < h; i++)
65  memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
66 }
67 
68 #define DCTELEM_FUNCS(dctcoef, suffix) \
69 static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block, \
70  const uint8_t *_pixels, \
71  int line_size) \
72 { \
73  const pixel *pixels = (const pixel *) _pixels; \
74  dctcoef *av_restrict block = (dctcoef *) _block; \
75  int i; \
76  \
77  /* read the pixels */ \
78  for(i=0;i<8;i++) { \
79  block[0] = pixels[0]; \
80  block[1] = pixels[1]; \
81  block[2] = pixels[2]; \
82  block[3] = pixels[3]; \
83  block[4] = pixels[4]; \
84  block[5] = pixels[5]; \
85  block[6] = pixels[6]; \
86  block[7] = pixels[7]; \
87  pixels += line_size / sizeof(pixel); \
88  block += 8; \
89  } \
90 } \
91  \
92 static void FUNCC(clear_block ## suffix)(int16_t *block) \
93 { \
94  memset(block, 0, sizeof(dctcoef)*64); \
95 } \
96  \
97 /** \
98  * memset(blocks, 0, sizeof(int16_t)*6*64) \
99  */ \
100 static void FUNCC(clear_blocks ## suffix)(int16_t *blocks) \
101 { \
102  memset(blocks, 0, sizeof(dctcoef)*6*64); \
103 }
104 
105 DCTELEM_FUNCS(int16_t, _16)
106 #if BIT_DEPTH > 8
108 #endif
109 
110 #include "hpel_template.c"
111 
112 #define PIXOP2(OPNAME, OP) \
113 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
114  int src_stride1, int src_stride2, int h){\
115  int i;\
116  for(i=0; i<h; i++){\
117  pixel4 a,b;\
118  a= AV_RN4P(&src1[i*src_stride1 ]);\
119  b= AV_RN4P(&src2[i*src_stride2 ]);\
120  OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\
121  a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\
122  b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\
123  OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\
124  }\
125 }\
126 \
127 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
128  int src_stride1, int src_stride2, int h){\
129  FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
130  FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\
131 }\
132 \
133 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
134  FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
135 }\
136 \
137 static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
138  FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
139 }\
140 \
141 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
142  FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
143 }\
144 \
145 static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
146  FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
147 }\
148 \
149 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
150  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
151  /* FIXME HIGH BIT DEPTH */\
152  int i;\
153  for(i=0; i<h; i++){\
154  uint32_t a, b, c, d, l0, l1, h0, h1;\
155  a= AV_RN32(&src1[i*src_stride1]);\
156  b= AV_RN32(&src2[i*src_stride2]);\
157  c= AV_RN32(&src3[i*src_stride3]);\
158  d= AV_RN32(&src4[i*src_stride4]);\
159  l0= (a&0x03030303UL)\
160  + (b&0x03030303UL)\
161  + 0x02020202UL;\
162  h0= ((a&0xFCFCFCFCUL)>>2)\
163  + ((b&0xFCFCFCFCUL)>>2);\
164  l1= (c&0x03030303UL)\
165  + (d&0x03030303UL);\
166  h1= ((c&0xFCFCFCFCUL)>>2)\
167  + ((d&0xFCFCFCFCUL)>>2);\
168  OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
169  a= AV_RN32(&src1[i*src_stride1+4]);\
170  b= AV_RN32(&src2[i*src_stride2+4]);\
171  c= AV_RN32(&src3[i*src_stride3+4]);\
172  d= AV_RN32(&src4[i*src_stride4+4]);\
173  l0= (a&0x03030303UL)\
174  + (b&0x03030303UL)\
175  + 0x02020202UL;\
176  h0= ((a&0xFCFCFCFCUL)>>2)\
177  + ((b&0xFCFCFCFCUL)>>2);\
178  l1= (c&0x03030303UL)\
179  + (d&0x03030303UL);\
180  h1= ((c&0xFCFCFCFCUL)>>2)\
181  + ((d&0xFCFCFCFCUL)>>2);\
182  OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
183  }\
184 }\
185 \
186 static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
187  FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
188 }\
189 \
190 static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
191  FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
192 }\
193 \
194 static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
195  FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\
196 }\
197 \
198 static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h){\
199  FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
200 }\
201 \
202 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
203  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
204  /* FIXME HIGH BIT DEPTH*/\
205  int i;\
206  for(i=0; i<h; i++){\
207  uint32_t a, b, c, d, l0, l1, h0, h1;\
208  a= AV_RN32(&src1[i*src_stride1]);\
209  b= AV_RN32(&src2[i*src_stride2]);\
210  c= AV_RN32(&src3[i*src_stride3]);\
211  d= AV_RN32(&src4[i*src_stride4]);\
212  l0= (a&0x03030303UL)\
213  + (b&0x03030303UL)\
214  + 0x01010101UL;\
215  h0= ((a&0xFCFCFCFCUL)>>2)\
216  + ((b&0xFCFCFCFCUL)>>2);\
217  l1= (c&0x03030303UL)\
218  + (d&0x03030303UL);\
219  h1= ((c&0xFCFCFCFCUL)>>2)\
220  + ((d&0xFCFCFCFCUL)>>2);\
221  OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
222  a= AV_RN32(&src1[i*src_stride1+4]);\
223  b= AV_RN32(&src2[i*src_stride2+4]);\
224  c= AV_RN32(&src3[i*src_stride3+4]);\
225  d= AV_RN32(&src4[i*src_stride4+4]);\
226  l0= (a&0x03030303UL)\
227  + (b&0x03030303UL)\
228  + 0x01010101UL;\
229  h0= ((a&0xFCFCFCFCUL)>>2)\
230  + ((b&0xFCFCFCFCUL)>>2);\
231  l1= (c&0x03030303UL)\
232  + (d&0x03030303UL);\
233  h1= ((c&0xFCFCFCFCUL)>>2)\
234  + ((d&0xFCFCFCFCUL)>>2);\
235  OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
236  }\
237 }\
238 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
239  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
240  FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
241  FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
242 }\
243 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
244  int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
245  FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
246  FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
247 }\
248 \
249 static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, ptrdiff_t line_size, int h)\
250 {\
251  int i, a0, b0, a1, b1;\
252  pixel *block = (pixel*)_block;\
253  const pixel *pixels = (const pixel*)_pixels;\
254  line_size >>= sizeof(pixel)-1;\
255  a0= pixels[0];\
256  b0= pixels[1] + 2;\
257  a0 += b0;\
258  b0 += pixels[2];\
259 \
260  pixels+=line_size;\
261  for(i=0; i<h; i+=2){\
262  a1= pixels[0];\
263  b1= pixels[1];\
264  a1 += b1;\
265  b1 += pixels[2];\
266 \
267  block[0]= (a1+a0)>>2; /* FIXME non put */\
268  block[1]= (b1+b0)>>2;\
269 \
270  pixels+=line_size;\
271  block +=line_size;\
272 \
273  a0= pixels[0];\
274  b0= pixels[1] + 2;\
275  a0 += b0;\
276  b0 += pixels[2];\
277 \
278  block[0]= (a1+a0)>>2;\
279  block[1]= (b1+b0)>>2;\
280  pixels+=line_size;\
281  block +=line_size;\
282  }\
283 }\
284 \
285 static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
286 {\
287  /* FIXME HIGH BIT DEPTH */\
288  int i;\
289  const uint32_t a= AV_RN32(pixels );\
290  const uint32_t b= AV_RN32(pixels+1);\
291  uint32_t l0= (a&0x03030303UL)\
292  + (b&0x03030303UL)\
293  + 0x02020202UL;\
294  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
295  + ((b&0xFCFCFCFCUL)>>2);\
296  uint32_t l1,h1;\
297 \
298  pixels+=line_size;\
299  for(i=0; i<h; i+=2){\
300  uint32_t a= AV_RN32(pixels );\
301  uint32_t b= AV_RN32(pixels+1);\
302  l1= (a&0x03030303UL)\
303  + (b&0x03030303UL);\
304  h1= ((a&0xFCFCFCFCUL)>>2)\
305  + ((b&0xFCFCFCFCUL)>>2);\
306  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
307  pixels+=line_size;\
308  block +=line_size;\
309  a= AV_RN32(pixels );\
310  b= AV_RN32(pixels+1);\
311  l0= (a&0x03030303UL)\
312  + (b&0x03030303UL)\
313  + 0x02020202UL;\
314  h0= ((a&0xFCFCFCFCUL)>>2)\
315  + ((b&0xFCFCFCFCUL)>>2);\
316  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
317  pixels+=line_size;\
318  block +=line_size;\
319  }\
320 }\
321 \
322 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
323 {\
324  /* FIXME HIGH BIT DEPTH */\
325  int j;\
326  for(j=0; j<2; j++){\
327  int i;\
328  const uint32_t a= AV_RN32(pixels );\
329  const uint32_t b= AV_RN32(pixels+1);\
330  uint32_t l0= (a&0x03030303UL)\
331  + (b&0x03030303UL)\
332  + 0x02020202UL;\
333  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
334  + ((b&0xFCFCFCFCUL)>>2);\
335  uint32_t l1,h1;\
336 \
337  pixels+=line_size;\
338  for(i=0; i<h; i+=2){\
339  uint32_t a= AV_RN32(pixels );\
340  uint32_t b= AV_RN32(pixels+1);\
341  l1= (a&0x03030303UL)\
342  + (b&0x03030303UL);\
343  h1= ((a&0xFCFCFCFCUL)>>2)\
344  + ((b&0xFCFCFCFCUL)>>2);\
345  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
346  pixels+=line_size;\
347  block +=line_size;\
348  a= AV_RN32(pixels );\
349  b= AV_RN32(pixels+1);\
350  l0= (a&0x03030303UL)\
351  + (b&0x03030303UL)\
352  + 0x02020202UL;\
353  h0= ((a&0xFCFCFCFCUL)>>2)\
354  + ((b&0xFCFCFCFCUL)>>2);\
355  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
356  pixels+=line_size;\
357  block +=line_size;\
358  }\
359  pixels+=4-line_size*(h+1);\
360  block +=4-line_size*h;\
361  }\
362 }\
363 \
364 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)\
365 {\
366  /* FIXME HIGH BIT DEPTH */\
367  int j;\
368  for(j=0; j<2; j++){\
369  int i;\
370  const uint32_t a= AV_RN32(pixels );\
371  const uint32_t b= AV_RN32(pixels+1);\
372  uint32_t l0= (a&0x03030303UL)\
373  + (b&0x03030303UL)\
374  + 0x01010101UL;\
375  uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
376  + ((b&0xFCFCFCFCUL)>>2);\
377  uint32_t l1,h1;\
378 \
379  pixels+=line_size;\
380  for(i=0; i<h; i+=2){\
381  uint32_t a= AV_RN32(pixels );\
382  uint32_t b= AV_RN32(pixels+1);\
383  l1= (a&0x03030303UL)\
384  + (b&0x03030303UL);\
385  h1= ((a&0xFCFCFCFCUL)>>2)\
386  + ((b&0xFCFCFCFCUL)>>2);\
387  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
388  pixels+=line_size;\
389  block +=line_size;\
390  a= AV_RN32(pixels );\
391  b= AV_RN32(pixels+1);\
392  l0= (a&0x03030303UL)\
393  + (b&0x03030303UL)\
394  + 0x01010101UL;\
395  h0= ((a&0xFCFCFCFCUL)>>2)\
396  + ((b&0xFCFCFCFCUL)>>2);\
397  OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
398  pixels+=line_size;\
399  block +=line_size;\
400  }\
401  pixels+=4-line_size*(h+1);\
402  block +=4-line_size*h;\
403  }\
404 }\
405 \
406 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\
407 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\
408 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\
409 av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\
410 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\
411 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\
412 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\
414 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
415 #define op_put(a, b) a = b
416 #if BIT_DEPTH == 8
417 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
418 PIXOP2(avg, op_avg)
419 PIXOP2(put, op_put)
420 #endif
421 #undef op_avg
422 #undef op_put
423 
424 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
425  FUNCC(put_pixels8)(dst, src, stride, 8);
426 }
427 void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
428  FUNCC(avg_pixels8)(dst, src, stride, 8);
429 }
430 void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
431  FUNCC(put_pixels16)(dst, src, stride, 16);
432 }
433 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
434  FUNCC(avg_pixels16)(dst, src, stride, 16);
435 }
436