FFmpeg
dcadct.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2016 foo86
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/common.h"
22 
23 #include "dcadct.h"
24 #include "dcamath.h"
25 
26 static void sum_a(const int *input, int *output, int len)
27 {
28  int i;
29 
30  for (i = 0; i < len; i++)
31  output[i] = input[2 * i] + input[2 * i + 1];
32 }
33 
34 static void sum_b(const int *input, int *output, int len)
35 {
36  int i;
37 
38  output[0] = input[0];
39  for (i = 1; i < len; i++)
40  output[i] = input[2 * i] + input[2 * i - 1];
41 }
42 
43 static void sum_c(const int *input, int *output, int len)
44 {
45  int i;
46 
47  for (i = 0; i < len; i++)
48  output[i] = input[2 * i];
49 }
50 
51 static void sum_d(const int *input, int *output, int len)
52 {
53  int i;
54 
55  output[0] = input[1];
56  for (i = 1; i < len; i++)
57  output[i] = input[2 * i - 1] + input[2 * i + 1];
58 }
59 
60 static void dct_a(const int *input, int *output)
61 {
62  static const int cos_mod[8][8] = {
63  { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 },
64  { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
65  { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 },
66  { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 },
67  { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 },
68  { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 },
69  { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 },
70  { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 }
71  };
72 
73  int i, j;
74 
75  for (i = 0; i < 8; i++) {
76  int64_t res = 0;
77  for (j = 0; j < 8; j++)
78  res += (int64_t)cos_mod[i][j] * input[j];
79  output[i] = norm23(res);
80  }
81 }
82 
83 static void dct_b(const int *input, int *output)
84 {
85  static const int cos_mod[8][7] = {
86  { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 },
87  { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
88  { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 },
89  { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 },
90  { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 },
91  { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 },
92  { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 },
93  { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 }
94  };
95 
96  int i, j;
97 
98  for (i = 0; i < 8; i++) {
99  int64_t res = input[0] * (INT64_C(1) << 23);
100  for (j = 0; j < 7; j++)
101  res += (int64_t)cos_mod[i][j] * input[1 + j];
102  output[i] = norm23(res);
103  }
104 }
105 
106 static void mod_a(const int *input, int *output)
107 {
108  static const int cos_mod[16] = {
109  4199362, 4240198, 4323885, 4454708,
110  4639772, 4890013, 5221943, 5660703,
111  -6245623, -7040975, -8158494, -9809974,
112  -12450076, -17261920, -28585092, -85479984
113  };
114 
115  int i, k;
116 
117  for (i = 0; i < 8; i++)
118  output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
119 
120  for (i = 8, k = 7; i < 16; i++, k--)
121  output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
122 }
123 
124 static void mod_b(int *input, int *output)
125 {
126  static const int cos_mod[8] = {
127  4214598, 4383036, 4755871, 5425934,
128  6611520, 8897610, 14448934, 42791536
129  };
130 
131  int i, k;
132 
133  for (i = 0; i < 8; i++)
134  input[8 + i] = mul23(cos_mod[i], input[8 + i]);
135 
136  for (i = 0; i < 8; i++)
137  output[i] = input[i] + input[8 + i];
138 
139  for (i = 8, k = 7; i < 16; i++, k--)
140  output[i] = input[k] - input[8 + k];
141 }
142 
143 static void mod_c(const int *input, int *output)
144 {
145  static const int cos_mod[32] = {
146  1048892, 1051425, 1056522, 1064244,
147  1074689, 1087987, 1104313, 1123884,
148  1146975, 1173922, 1205139, 1241133,
149  1282529, 1330095, 1384791, 1447815,
150  -1520688, -1605358, -1704360, -1821051,
151  -1959964, -2127368, -2332183, -2587535,
152  -2913561, -3342802, -3931480, -4785806,
153  -6133390, -8566050, -14253820, -42727120
154  };
155 
156  int i, k;
157 
158  for (i = 0; i < 16; i++)
159  output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
160 
161  for (i = 16, k = 15; i < 32; i++, k--)
162  output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
163 }
164 
165 static void clp_v(int *input, int len)
166 {
167  int i;
168 
169  for (i = 0; i < len; i++)
170  input[i] = clip23(input[i]);
171 }
172 
173 static void imdct_half_32(int32_t *output, const int32_t *input)
174 {
175  int buf_a[32], buf_b[32];
176  int i, k, mag, shift, round;
177 
178  mag = 0;
179  for (i = 0; i < 32; i++)
180  mag += abs(input[i]);
181 
182  shift = mag > 0x400000 ? 2 : 0;
183  round = shift > 0 ? 1 << (shift - 1) : 0;
184 
185  for (i = 0; i < 32; i++)
186  buf_a[i] = (input[i] + round) >> shift;
187 
188  sum_a(buf_a, buf_b + 0, 16);
189  sum_b(buf_a, buf_b + 16, 16);
190  clp_v(buf_b, 32);
191 
192  sum_a(buf_b + 0, buf_a + 0, 8);
193  sum_b(buf_b + 0, buf_a + 8, 8);
194  sum_c(buf_b + 16, buf_a + 16, 8);
195  sum_d(buf_b + 16, buf_a + 24, 8);
196  clp_v(buf_a, 32);
197 
198  dct_a(buf_a + 0, buf_b + 0);
199  dct_b(buf_a + 8, buf_b + 8);
200  dct_b(buf_a + 16, buf_b + 16);
201  dct_b(buf_a + 24, buf_b + 24);
202  clp_v(buf_b, 32);
203 
204  mod_a(buf_b + 0, buf_a + 0);
205  mod_b(buf_b + 16, buf_a + 16);
206  clp_v(buf_a, 32);
207 
208  mod_c(buf_a, buf_b);
209 
210  for (i = 0; i < 32; i++)
211  buf_b[i] = clip23(buf_b[i] * (1 << shift));
212 
213  for (i = 0, k = 31; i < 16; i++, k--) {
214  output[ i] = clip23(buf_b[i] - buf_b[k]);
215  output[16 + i] = clip23(buf_b[i] + buf_b[k]);
216  }
217 }
218 
219 static void mod64_a(const int *input, int *output)
220 {
221  static const int cos_mod[32] = {
222  4195568, 4205700, 4226086, 4256977,
223  4298755, 4351949, 4417251, 4495537,
224  4587901, 4695690, 4820557, 4964534,
225  5130115, 5320382, 5539164, 5791261,
226  -6082752, -6421430, -6817439, -7284203,
227  -7839855, -8509474, -9328732, -10350140,
228  -11654242, -13371208, -15725922, -19143224,
229  -24533560, -34264200, -57015280, -170908480
230  };
231 
232  int i, k;
233 
234  for (i = 0; i < 16; i++)
235  output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
236 
237  for (i = 16, k = 15; i < 32; i++, k--)
238  output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
239 }
240 
241 static void mod64_b(int *input, int *output)
242 {
243  static const int cos_mod[16] = {
244  4199362, 4240198, 4323885, 4454708,
245  4639772, 4890013, 5221943, 5660703,
246  6245623, 7040975, 8158494, 9809974,
247  12450076, 17261920, 28585092, 85479984
248  };
249 
250  int i, k;
251 
252  for (i = 0; i < 16; i++)
253  input[16 + i] = mul23(cos_mod[i], input[16 + i]);
254 
255  for (i = 0; i < 16; i++)
256  output[i] = input[i] + input[16 + i];
257 
258  for (i = 16, k = 15; i < 32; i++, k--)
259  output[i] = input[k] - input[16 + k];
260 }
261 
262 static void mod64_c(const int *input, int *output)
263 {
264  static const int cos_mod[64] = {
265  741511, 741958, 742853, 744199,
266  746001, 748262, 750992, 754197,
267  757888, 762077, 766777, 772003,
268  777772, 784105, 791021, 798546,
269  806707, 815532, 825054, 835311,
270  846342, 858193, 870912, 884554,
271  899181, 914860, 931667, 949686,
272  969011, 989747, 1012012, 1035941,
273  -1061684, -1089412, -1119320, -1151629,
274  -1186595, -1224511, -1265719, -1310613,
275  -1359657, -1413400, -1472490, -1537703,
276  -1609974, -1690442, -1780506, -1881904,
277  -1996824, -2128058, -2279225, -2455101,
278  -2662128, -2909200, -3208956, -3579983,
279  -4050785, -4667404, -5509372, -6726913,
280  -8641940, -12091426, -20144284, -60420720
281  };
282 
283  int i, k;
284 
285  for (i = 0; i < 32; i++)
286  output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
287 
288  for (i = 32, k = 31; i < 64; i++, k--)
289  output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
290 }
291 
292 static void imdct_half_64(int32_t *output, const int32_t *input)
293 {
294  int buf_a[64], buf_b[64];
295  int i, k, mag, shift, round;
296 
297  mag = 0;
298  for (i = 0; i < 64; i++)
299  mag += abs(input[i]);
300 
301  shift = mag > 0x400000 ? 2 : 0;
302  round = shift > 0 ? 1 << (shift - 1) : 0;
303 
304  for (i = 0; i < 64; i++)
305  buf_a[i] = (input[i] + round) >> shift;
306 
307  sum_a(buf_a, buf_b + 0, 32);
308  sum_b(buf_a, buf_b + 32, 32);
309  clp_v(buf_b, 64);
310 
311  sum_a(buf_b + 0, buf_a + 0, 16);
312  sum_b(buf_b + 0, buf_a + 16, 16);
313  sum_c(buf_b + 32, buf_a + 32, 16);
314  sum_d(buf_b + 32, buf_a + 48, 16);
315  clp_v(buf_a, 64);
316 
317  sum_a(buf_a + 0, buf_b + 0, 8);
318  sum_b(buf_a + 0, buf_b + 8, 8);
319  sum_c(buf_a + 16, buf_b + 16, 8);
320  sum_d(buf_a + 16, buf_b + 24, 8);
321  sum_c(buf_a + 32, buf_b + 32, 8);
322  sum_d(buf_a + 32, buf_b + 40, 8);
323  sum_c(buf_a + 48, buf_b + 48, 8);
324  sum_d(buf_a + 48, buf_b + 56, 8);
325  clp_v(buf_b, 64);
326 
327  dct_a(buf_b + 0, buf_a + 0);
328  dct_b(buf_b + 8, buf_a + 8);
329  dct_b(buf_b + 16, buf_a + 16);
330  dct_b(buf_b + 24, buf_a + 24);
331  dct_b(buf_b + 32, buf_a + 32);
332  dct_b(buf_b + 40, buf_a + 40);
333  dct_b(buf_b + 48, buf_a + 48);
334  dct_b(buf_b + 56, buf_a + 56);
335  clp_v(buf_a, 64);
336 
337  mod_a(buf_a + 0, buf_b + 0);
338  mod_b(buf_a + 16, buf_b + 16);
339  mod_b(buf_a + 32, buf_b + 32);
340  mod_b(buf_a + 48, buf_b + 48);
341  clp_v(buf_b, 64);
342 
343  mod64_a(buf_b + 0, buf_a + 0);
344  mod64_b(buf_b + 32, buf_a + 32);
345  clp_v(buf_a, 64);
346 
347  mod64_c(buf_a, buf_b);
348 
349  for (i = 0; i < 64; i++)
350  buf_b[i] = clip23(buf_b[i] * (1 << shift));
351 
352  for (i = 0, k = 63; i < 32; i++, k--) {
353  output[ i] = clip23(buf_b[i] - buf_b[k]);
354  output[32 + i] = clip23(buf_b[i] + buf_b[k]);
355  }
356 }
357 
359 {
360  c->imdct_half[0] = imdct_half_32;
361  c->imdct_half[1] = imdct_half_64;
362 }
static int shift(int a, int b)
Definition: sonic.c:82
static void dct_a(const int *input, int *output)
Definition: dcadct.c:60
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
static void imdct_half_64(int32_t *output, const int32_t *input)
Definition: dcadct.c:292
static void sum_c(const int *input, int *output, int len)
Definition: dcadct.c:43
static void mod_a(const int *input, int *output)
Definition: dcadct.c:106
static int32_t clip23(int32_t a)
Definition: dcamath.h:54
static int32_t mul23(int32_t a, int32_t b)
Definition: dcamath.h:50
#define av_cold
Definition: attributes.h:82
static void sum_a(const int *input, int *output, int len)
Definition: dcadct.c:26
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
static void sum_b(const int *input, int *output, int len)
Definition: dcadct.c:34
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
static void clp_v(int *input, int len)
Definition: dcadct.c:165
static void mod_b(int *input, int *output)
Definition: dcadct.c:124
static void mod64_a(const int *input, int *output)
Definition: dcadct.c:219
static av_always_inline av_const double round(double x)
Definition: libm.h:444
int32_t
static void imdct_half_32(int32_t *output, const int32_t *input)
Definition: dcadct.c:173
#define abs(x)
Definition: cuda_runtime.h:35
av_cold void ff_dcadct_init(DCADCTContext *c)
Definition: dcadct.c:358
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
static void mod_c(const int *input, int *output)
Definition: dcadct.c:143
common internal and external API header
static void mod64_c(const int *input, int *output)
Definition: dcadct.c:262
int len
static void mod64_b(int *input, int *output)
Definition: dcadct.c:241
void(* imdct_half[2])(int32_t *output, const int32_t *input)
Definition: dcadct.h:27
static int32_t norm23(int64_t a)
Definition: dcamath.h:44
static void dct_b(const int *input, int *output)
Definition: dcadct.c:83