FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hpeldsp.c
Go to the documentation of this file.
1 /*
2  * aligned/packed access motion
3  *
4  * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/attributes.h"
24 #include "libavcodec/avcodec.h"
25 #include "libavcodec/dsputil.h"
26 #include "libavcodec/bit_depth_template.c" // for BYTE_VEC32
27 #include "libavcodec/hpeldsp.h"
28 #include "libavcodec/rnd_avg.h"
29 #include "dsputil_sh4.h"
30 
31 
32 #define LP(p) *(uint32_t*)(p)
33 #define LPC(p) *(const uint32_t*)(p)
34 
35 
36 #define UNPACK(ph,pl,tt0,tt1) do { \
37  uint32_t t0,t1; t0=tt0;t1=tt1; \
38  ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
39  pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
40 
41 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
42 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
43 
44 /* little-endian */
45 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
46 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
47 /* big
48 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
49 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
50 */
51 
52 
53 #define put(d,s) d = s
54 #define avg(d,s) d = rnd_avg32(s,d)
55 
56 #define OP_C4(ofs) \
57  ref-=ofs; \
58  do { \
59  OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \
60  ref+=stride; \
61  dest+=stride; \
62  } while(--height)
63 
64 #define OP_C40() \
65  do { \
66  OP(LP(dest),LPC(ref)); \
67  ref+=stride; \
68  dest+=stride; \
69  } while(--height)
70 
71 
72 #define OP put
73 
74 static void put_pixels4_c(uint8_t *dest, const uint8_t *ref,
75  const int stride, int height)
76 {
77  switch((int)ref&3){
78  case 0: OP_C40(); return;
79  case 1: OP_C4(1); return;
80  case 2: OP_C4(2); return;
81  case 3: OP_C4(3); return;
82  }
83 }
84 
85 #undef OP
86 #define OP avg
87 
88 static void avg_pixels4_c(uint8_t *dest, const uint8_t *ref,
89  const int stride, int height)
90 {
91  switch((int)ref&3){
92  case 0: OP_C40(); return;
93  case 1: OP_C4(1); return;
94  case 2: OP_C4(2); return;
95  case 3: OP_C4(3); return;
96  }
97 }
98 
99 #undef OP
100 
101 #define OP_C(ofs,sz,avg2) \
102 { \
103  ref-=ofs; \
104  do { \
105  uint32_t t0,t1; \
106  t0 = LPC(ref+0); \
107  t1 = LPC(ref+4); \
108  OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
109  t0 = LPC(ref+8); \
110  OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
111 if (sz==16) { \
112  t1 = LPC(ref+12); \
113  OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
114  t0 = LPC(ref+16); \
115  OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
116 } \
117  ref+=stride; \
118  dest+= stride; \
119  } while(--height); \
120 }
121 
122 /* aligned */
123 #define OP_C0(sz,avg2) \
124 { \
125  do { \
126  OP(LP(dest+0), LPC(ref+0)); \
127  OP(LP(dest+4), LPC(ref+4)); \
128 if (sz==16) { \
129  OP(LP(dest+8), LPC(ref+8)); \
130  OP(LP(dest+12), LPC(ref+12)); \
131 } \
132  ref+=stride; \
133  dest+= stride; \
134  } while(--height); \
135 }
136 
137 #define OP_X(ofs,sz,avg2) \
138 { \
139  ref-=ofs; \
140  do { \
141  uint32_t t0,t1; \
142  t0 = LPC(ref+0); \
143  t1 = LPC(ref+4); \
144  OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
145  t0 = LPC(ref+8); \
146  OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
147 if (sz==16) { \
148  t1 = LPC(ref+12); \
149  OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
150  t0 = LPC(ref+16); \
151  OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
152 } \
153  ref+=stride; \
154  dest+= stride; \
155  } while(--height); \
156 }
157 
158 /* aligned */
159 #define OP_Y0(sz,avg2) \
160 { \
161  uint32_t t0,t1,t2,t3,t; \
162 \
163  t0 = LPC(ref+0); \
164  t1 = LPC(ref+4); \
165 if (sz==16) { \
166  t2 = LPC(ref+8); \
167  t3 = LPC(ref+12); \
168 } \
169  do { \
170  ref += stride; \
171 \
172  t = LPC(ref+0); \
173  OP(LP(dest+0), avg2(t0,t)); t0 = t; \
174  t = LPC(ref+4); \
175  OP(LP(dest+4), avg2(t1,t)); t1 = t; \
176 if (sz==16) { \
177  t = LPC(ref+8); \
178  OP(LP(dest+8), avg2(t2,t)); t2 = t; \
179  t = LPC(ref+12); \
180  OP(LP(dest+12), avg2(t3,t)); t3 = t; \
181 } \
182  dest+= stride; \
183  } while(--height); \
184 }
185 
186 #define OP_Y(ofs,sz,avg2) \
187 { \
188  uint32_t t0,t1,t2,t3,t,w0,w1; \
189 \
190  ref-=ofs; \
191  w0 = LPC(ref+0); \
192  w1 = LPC(ref+4); \
193  t0 = MERGE1(w0,w1,ofs); \
194  w0 = LPC(ref+8); \
195  t1 = MERGE1(w1,w0,ofs); \
196 if (sz==16) { \
197  w1 = LPC(ref+12); \
198  t2 = MERGE1(w0,w1,ofs); \
199  w0 = LPC(ref+16); \
200  t3 = MERGE1(w1,w0,ofs); \
201 } \
202  do { \
203  ref += stride; \
204 \
205  w0 = LPC(ref+0); \
206  w1 = LPC(ref+4); \
207  t = MERGE1(w0,w1,ofs); \
208  OP(LP(dest+0), avg2(t0,t)); t0 = t; \
209  w0 = LPC(ref+8); \
210  t = MERGE1(w1,w0,ofs); \
211  OP(LP(dest+4), avg2(t1,t)); t1 = t; \
212 if (sz==16) { \
213  w1 = LPC(ref+12); \
214  t = MERGE1(w0,w1,ofs); \
215  OP(LP(dest+8), avg2(t2,t)); t2 = t; \
216  w0 = LPC(ref+16); \
217  t = MERGE1(w1,w0,ofs); \
218  OP(LP(dest+12), avg2(t3,t)); t3 = t; \
219 } \
220  dest+=stride; \
221  } while(--height); \
222 }
223 
224 #define OP_X0(sz,avg2) OP_X(0,sz,avg2)
225 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
226 #define OP_XY(ofs,sz,PACK) \
227 { \
228  uint32_t t2,t3,w0,w1; \
229  uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \
230 \
231  ref -= ofs; \
232  w0 = LPC(ref+0); \
233  w1 = LPC(ref+4); \
234  UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
235  w0 = LPC(ref+8); \
236  UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
237 if (sz==16) { \
238  w1 = LPC(ref+12); \
239  UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
240  w0 = LPC(ref+16); \
241  UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
242 } \
243  do { \
244  ref+=stride; \
245  w0 = LPC(ref+0); \
246  w1 = LPC(ref+4); \
247  UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
248  OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
249  a0 = t2; a1 = t3; \
250  w0 = LPC(ref+8); \
251  UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
252  OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
253  a2 = t2; a3 = t3; \
254 if (sz==16) { \
255  w1 = LPC(ref+12); \
256  UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
257  OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
258  a4 = t2; a5 = t3; \
259  w0 = LPC(ref+16); \
260  UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
261  OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
262  a6 = t2; a7 = t3; \
263 } \
264  dest+=stride; \
265  } while(--height); \
266 }
267 
268 #define DEFFUNC(prefix, op, rnd, xy, sz, OP_N, avgfunc) \
269 prefix void op##_##rnd##_pixels##sz##_##xy(uint8_t *dest, const uint8_t *ref, \
270  const ptrdiff_t stride, int height) \
271 { \
272  switch((int)ref&3) { \
273  case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
274  case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
275  case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
276  case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
277  } \
278 }
279 
280 #define OP put
281 
282 DEFFUNC( ,ff_put,rnd,o,8,OP_C,avg32)
283 DEFFUNC(static,put, rnd,x,8,OP_X,avg32)
284 DEFFUNC(static,put,no_rnd,x,8,OP_X,avg32)
285 DEFFUNC(static,put, rnd,y,8,OP_Y,avg32)
286 DEFFUNC(static,put,no_rnd,y,8,OP_Y,avg32)
287 DEFFUNC(static,put, rnd,xy,8,OP_XY,PACK)
288 DEFFUNC(static,put,no_rnd,xy,8,OP_XY,PACK)
289 DEFFUNC( ,ff_put,rnd,o,16,OP_C,avg32)
290 DEFFUNC(static,put, rnd,x,16,OP_X,avg32)
291 DEFFUNC(static,put,no_rnd,x,16,OP_X,avg32)
292 DEFFUNC(static,put, rnd,y,16,OP_Y,avg32)
293 DEFFUNC(static,put,no_rnd,y,16,OP_Y,avg32)
294 DEFFUNC(static,put, rnd,xy,16,OP_XY,PACK)
295 DEFFUNC(static,put,no_rnd,xy,16,OP_XY,PACK)
296 
297 #undef OP
298 #define OP avg
299 
300 DEFFUNC( ,ff_avg,rnd,o,8,OP_C,avg32)
301 DEFFUNC(static,avg, rnd,x,8,OP_X,avg32)
302 DEFFUNC(static,avg, rnd,y,8,OP_Y,avg32)
303 DEFFUNC(static,avg, rnd,xy,8,OP_XY,PACK)
304 DEFFUNC( ,ff_avg,rnd,o,16,OP_C,avg32)
305 DEFFUNC(static,avg, rnd,x,16,OP_X,avg32)
306 DEFFUNC(static,avg,no_rnd,x,16,OP_X,avg32)
307 DEFFUNC(static,avg, rnd,y,16,OP_Y,avg32)
308 DEFFUNC(static,avg,no_rnd,y,16,OP_Y,avg32)
309 DEFFUNC(static,avg, rnd,xy,16,OP_XY,PACK)
310 DEFFUNC(static,avg,no_rnd,xy,16,OP_XY,PACK)
311 
312 #undef OP
313 
314 #define ff_put_no_rnd_pixels8_o ff_put_rnd_pixels8_o
315 #define ff_put_no_rnd_pixels16_o ff_put_rnd_pixels16_o
316 #define ff_avg_no_rnd_pixels16_o ff_avg_rnd_pixels16_o
317 
319 {
321  c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
322  c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
323  c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
325  c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
326  c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
327  c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
328 
330  c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
331  c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
332  c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
334  c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
335  c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
336  c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
337 
339  c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
340  c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
341  c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
343  c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
344  c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
345  c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
346 
348  c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x;
349  c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y;
350  c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy;
351 }