FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
simple_idct_alpha.c
Go to the documentation of this file.
1 /*
2  * Simple IDCT (Alpha optimized)
3  *
4  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * based upon some outcommented C code from mpeg2dec (idct_mmx.c
7  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
8  *
9  * Alpha optimizations by Måns Rullgård <mans@mansr.com>
10  * and Falk Hueffner <falk@debian.org>
11  *
12  * This file is part of FFmpeg.
13  *
14  * FFmpeg is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * FFmpeg is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with FFmpeg; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 #include "libavcodec/dsputil.h"
30 #include "dsputil_alpha.h"
31 #include "asm.h"
32 
33 // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
34 // W4 is actually exactly 16384, but using 16383 works around
35 // accumulating rounding errors for some encoders
36 #define W1 22725
37 #define W2 21407
38 #define W3 19266
39 #define W4 16383
40 #define W5 12873
41 #define W6 8867
42 #define W7 4520
43 #define ROW_SHIFT 11
44 #define COL_SHIFT 20
45 
46 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
47 static inline int idct_row(DCTELEM *row)
48 {
49  int a0, a1, a2, a3, b0, b1, b2, b3, t;
50  uint64_t l, r, t2;
51  l = ldq(row);
52  r = ldq(row + 4);
53 
54  if (l == 0 && r == 0)
55  return 0;
56 
57  a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
58 
59  if (((l & ~0xffffUL) | r) == 0) {
60  a0 >>= ROW_SHIFT;
61  t2 = (uint16_t) a0;
62  t2 |= t2 << 16;
63  t2 |= t2 << 32;
64 
65  stq(t2, row);
66  stq(t2, row + 4);
67  return 1;
68  }
69 
70  a1 = a0;
71  a2 = a0;
72  a3 = a0;
73 
74  t = extwl(l, 4); /* row[2] */
75  if (t != 0) {
76  t = sextw(t);
77  a0 += W2 * t;
78  a1 += W6 * t;
79  a2 -= W6 * t;
80  a3 -= W2 * t;
81  }
82 
83  t = extwl(r, 0); /* row[4] */
84  if (t != 0) {
85  t = sextw(t);
86  a0 += W4 * t;
87  a1 -= W4 * t;
88  a2 -= W4 * t;
89  a3 += W4 * t;
90  }
91 
92  t = extwl(r, 4); /* row[6] */
93  if (t != 0) {
94  t = sextw(t);
95  a0 += W6 * t;
96  a1 -= W2 * t;
97  a2 += W2 * t;
98  a3 -= W6 * t;
99  }
100 
101  t = extwl(l, 2); /* row[1] */
102  if (t != 0) {
103  t = sextw(t);
104  b0 = W1 * t;
105  b1 = W3 * t;
106  b2 = W5 * t;
107  b3 = W7 * t;
108  } else {
109  b0 = 0;
110  b1 = 0;
111  b2 = 0;
112  b3 = 0;
113  }
114 
115  t = extwl(l, 6); /* row[3] */
116  if (t) {
117  t = sextw(t);
118  b0 += W3 * t;
119  b1 -= W7 * t;
120  b2 -= W1 * t;
121  b3 -= W5 * t;
122  }
123 
124 
125  t = extwl(r, 2); /* row[5] */
126  if (t) {
127  t = sextw(t);
128  b0 += W5 * t;
129  b1 -= W1 * t;
130  b2 += W7 * t;
131  b3 += W3 * t;
132  }
133 
134  t = extwl(r, 6); /* row[7] */
135  if (t) {
136  t = sextw(t);
137  b0 += W7 * t;
138  b1 -= W5 * t;
139  b2 += W3 * t;
140  b3 -= W1 * t;
141  }
142 
143  row[0] = (a0 + b0) >> ROW_SHIFT;
144  row[1] = (a1 + b1) >> ROW_SHIFT;
145  row[2] = (a2 + b2) >> ROW_SHIFT;
146  row[3] = (a3 + b3) >> ROW_SHIFT;
147  row[4] = (a3 - b3) >> ROW_SHIFT;
148  row[5] = (a2 - b2) >> ROW_SHIFT;
149  row[6] = (a1 - b1) >> ROW_SHIFT;
150  row[7] = (a0 - b0) >> ROW_SHIFT;
151 
152  return 2;
153 }
154 
155 static inline void idct_col(DCTELEM *col)
156 {
157  int a0, a1, a2, a3, b0, b1, b2, b3;
158 
159  col[0] += (1 << (COL_SHIFT - 1)) / W4;
160 
161  a0 = W4 * col[8 * 0];
162  a1 = W4 * col[8 * 0];
163  a2 = W4 * col[8 * 0];
164  a3 = W4 * col[8 * 0];
165 
166  if (col[8 * 2]) {
167  a0 += W2 * col[8 * 2];
168  a1 += W6 * col[8 * 2];
169  a2 -= W6 * col[8 * 2];
170  a3 -= W2 * col[8 * 2];
171  }
172 
173  if (col[8 * 4]) {
174  a0 += W4 * col[8 * 4];
175  a1 -= W4 * col[8 * 4];
176  a2 -= W4 * col[8 * 4];
177  a3 += W4 * col[8 * 4];
178  }
179 
180  if (col[8 * 6]) {
181  a0 += W6 * col[8 * 6];
182  a1 -= W2 * col[8 * 6];
183  a2 += W2 * col[8 * 6];
184  a3 -= W6 * col[8 * 6];
185  }
186 
187  if (col[8 * 1]) {
188  b0 = W1 * col[8 * 1];
189  b1 = W3 * col[8 * 1];
190  b2 = W5 * col[8 * 1];
191  b3 = W7 * col[8 * 1];
192  } else {
193  b0 = 0;
194  b1 = 0;
195  b2 = 0;
196  b3 = 0;
197  }
198 
199  if (col[8 * 3]) {
200  b0 += W3 * col[8 * 3];
201  b1 -= W7 * col[8 * 3];
202  b2 -= W1 * col[8 * 3];
203  b3 -= W5 * col[8 * 3];
204  }
205 
206  if (col[8 * 5]) {
207  b0 += W5 * col[8 * 5];
208  b1 -= W1 * col[8 * 5];
209  b2 += W7 * col[8 * 5];
210  b3 += W3 * col[8 * 5];
211  }
212 
213  if (col[8 * 7]) {
214  b0 += W7 * col[8 * 7];
215  b1 -= W5 * col[8 * 7];
216  b2 += W3 * col[8 * 7];
217  b3 -= W1 * col[8 * 7];
218  }
219 
220  col[8 * 0] = (a0 + b0) >> COL_SHIFT;
221  col[8 * 7] = (a0 - b0) >> COL_SHIFT;
222  col[8 * 1] = (a1 + b1) >> COL_SHIFT;
223  col[8 * 6] = (a1 - b1) >> COL_SHIFT;
224  col[8 * 2] = (a2 + b2) >> COL_SHIFT;
225  col[8 * 5] = (a2 - b2) >> COL_SHIFT;
226  col[8 * 3] = (a3 + b3) >> COL_SHIFT;
227  col[8 * 4] = (a3 - b3) >> COL_SHIFT;
228 }
229 
230 /* If all rows but the first one are zero after row transformation,
231  all rows will be identical after column transformation. */
232 static inline void idct_col2(DCTELEM *col)
233 {
234  int i;
235  uint64_t l, r;
236 
237  for (i = 0; i < 8; ++i) {
238  int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
239 
240  a0 *= W4;
241  col[i] = a0 >> COL_SHIFT;
242  }
243 
244  l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
245  stq(l, col + 2 * 4); stq(r, col + 3 * 4);
246  stq(l, col + 4 * 4); stq(r, col + 5 * 4);
247  stq(l, col + 6 * 4); stq(r, col + 7 * 4);
248  stq(l, col + 8 * 4); stq(r, col + 9 * 4);
249  stq(l, col + 10 * 4); stq(r, col + 11 * 4);
250  stq(l, col + 12 * 4); stq(r, col + 13 * 4);
251  stq(l, col + 14 * 4); stq(r, col + 15 * 4);
252 }
253 
255 {
256 
257  int i;
258  int rowsZero = 1; /* all rows except row 0 zero */
259  int rowsConstant = 1; /* all rows consist of a constant value */
260 
261  for (i = 0; i < 8; i++) {
262  int sparseness = idct_row(block + 8 * i);
263 
264  if (i > 0 && sparseness > 0)
265  rowsZero = 0;
266  if (sparseness == 2)
267  rowsConstant = 0;
268  }
269 
270  if (rowsZero) {
271  idct_col2(block);
272  } else if (rowsConstant) {
273  idct_col(block);
274  for (i = 0; i < 8; i += 2) {
275  uint64_t v = (uint16_t) block[0];
276  uint64_t w = (uint16_t) block[8];
277 
278  v |= v << 16;
279  w |= w << 16;
280  v |= v << 32;
281  w |= w << 32;
282  stq(v, block + 0 * 4);
283  stq(v, block + 1 * 4);
284  stq(w, block + 2 * 4);
285  stq(w, block + 3 * 4);
286  block += 4 * 4;
287  }
288  } else {
289  for (i = 0; i < 8; i++)
290  idct_col(block + i);
291  }
292 }
293 
294 void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
295 {
296  ff_simple_idct_axp(block);
297  put_pixels_clamped_axp_p(block, dest, line_size);
298 }
299 
300 void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
301 {
302  ff_simple_idct_axp(block);
303  add_pixels_clamped_axp_p(block, dest, line_size);
304 }