FFmpeg
wmv2dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/attributes.h"
20 #include "libavutil/common.h"
21 #include "avcodec.h"
22 #include "idctdsp.h"
23 #include "mathops.h"
24 #include "wmv2dsp.h"
25 
26 #define W0 2048
27 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
28 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
29 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
30 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
31 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
32 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
33 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
34 
35 static void wmv2_idct_row(short * b)
36 {
37  int s1, s2;
38  int a0, a1, a2, a3, a4, a5, a6, a7;
39 
40  /* step 1 */
41  a1 = W1 * b[1] + W7 * b[7];
42  a7 = W7 * b[1] - W1 * b[7];
43  a5 = W5 * b[5] + W3 * b[3];
44  a3 = W3 * b[5] - W5 * b[3];
45  a2 = W2 * b[2] + W6 * b[6];
46  a6 = W6 * b[2] - W2 * b[6];
47  a0 = W0 * b[0] + W0 * b[4];
48  a4 = W0 * b[0] - W0 * b[4];
49 
50  /* step 2 */
51  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8; // 1, 3, 5, 7
52  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
53 
54  /* step 3 */
55  b[0] = (a0 + a2 + a1 + a5 + (1 << 7)) >> 8;
56  b[1] = (a4 + a6 + s1 + (1 << 7)) >> 8;
57  b[2] = (a4 - a6 + s2 + (1 << 7)) >> 8;
58  b[3] = (a0 - a2 + a7 + a3 + (1 << 7)) >> 8;
59  b[4] = (a0 - a2 - a7 - a3 + (1 << 7)) >> 8;
60  b[5] = (a4 - a6 - s2 + (1 << 7)) >> 8;
61  b[6] = (a4 + a6 - s1 + (1 << 7)) >> 8;
62  b[7] = (a0 + a2 - a1 - a5 + (1 << 7)) >> 8;
63 }
64 
65 static void wmv2_idct_col(short * b)
66 {
67  int s1, s2;
68  int a0, a1, a2, a3, a4, a5, a6, a7;
69 
70  /* step 1, with extended precision */
71  a1 = (W1 * b[8 * 1] + W7 * b[8 * 7] + 4) >> 3;
72  a7 = (W7 * b[8 * 1] - W1 * b[8 * 7] + 4) >> 3;
73  a5 = (W5 * b[8 * 5] + W3 * b[8 * 3] + 4) >> 3;
74  a3 = (W3 * b[8 * 5] - W5 * b[8 * 3] + 4) >> 3;
75  a2 = (W2 * b[8 * 2] + W6 * b[8 * 6] + 4) >> 3;
76  a6 = (W6 * b[8 * 2] - W2 * b[8 * 6] + 4) >> 3;
77  a0 = (W0 * b[8 * 0] + W0 * b[8 * 4] ) >> 3;
78  a4 = (W0 * b[8 * 0] - W0 * b[8 * 4] ) >> 3;
79 
80  /* step 2 */
81  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8;
82  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
83 
84  /* step 3 */
85  b[8 * 0] = (a0 + a2 + a1 + a5 + (1 << 13)) >> 14;
86  b[8 * 1] = (a4 + a6 + s1 + (1 << 13)) >> 14;
87  b[8 * 2] = (a4 - a6 + s2 + (1 << 13)) >> 14;
88  b[8 * 3] = (a0 - a2 + a7 + a3 + (1 << 13)) >> 14;
89 
90  b[8 * 4] = (a0 - a2 - a7 - a3 + (1 << 13)) >> 14;
91  b[8 * 5] = (a4 - a6 - s2 + (1 << 13)) >> 14;
92  b[8 * 6] = (a4 + a6 - s1 + (1 << 13)) >> 14;
93  b[8 * 7] = (a0 + a2 - a1 - a5 + (1 << 13)) >> 14;
94 }
95 
96 static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
97 {
98  int i;
99 
100  for (i = 0; i < 64; i += 8)
101  wmv2_idct_row(block + i);
102  for (i = 0; i < 8; i++)
103  wmv2_idct_col(block + i);
104 
105  for (i = 0; i < 8; i++) {
106  dest[0] = av_clip_uint8(dest[0] + block[0]);
107  dest[1] = av_clip_uint8(dest[1] + block[1]);
108  dest[2] = av_clip_uint8(dest[2] + block[2]);
109  dest[3] = av_clip_uint8(dest[3] + block[3]);
110  dest[4] = av_clip_uint8(dest[4] + block[4]);
111  dest[5] = av_clip_uint8(dest[5] + block[5]);
112  dest[6] = av_clip_uint8(dest[6] + block[6]);
113  dest[7] = av_clip_uint8(dest[7] + block[7]);
114  dest += line_size;
115  block += 8;
116  }
117 }
118 
119 static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
120 {
121  int i;
122 
123  for (i = 0; i < 64; i += 8)
124  wmv2_idct_row(block + i);
125  for (i = 0; i < 8; i++)
126  wmv2_idct_col(block + i);
127 
128  for (i = 0; i < 8; i++) {
129  dest[0] = av_clip_uint8(block[0]);
130  dest[1] = av_clip_uint8(block[1]);
131  dest[2] = av_clip_uint8(block[2]);
132  dest[3] = av_clip_uint8(block[3]);
133  dest[4] = av_clip_uint8(block[4]);
134  dest[5] = av_clip_uint8(block[5]);
135  dest[6] = av_clip_uint8(block[6]);
136  dest[7] = av_clip_uint8(block[7]);
137  dest += line_size;
138  block += 8;
139  }
140 }
141 
142 static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src,
143  int dstStride, int srcStride, int h)
144 {
145  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
146  int i;
147 
148  for (i = 0; i < h; i++) {
149  dst[0] = cm[(9 * (src[0] + src[1]) - (src[-1] + src[2]) + 8) >> 4];
150  dst[1] = cm[(9 * (src[1] + src[2]) - (src[0] + src[3]) + 8) >> 4];
151  dst[2] = cm[(9 * (src[2] + src[3]) - (src[1] + src[4]) + 8) >> 4];
152  dst[3] = cm[(9 * (src[3] + src[4]) - (src[2] + src[5]) + 8) >> 4];
153  dst[4] = cm[(9 * (src[4] + src[5]) - (src[3] + src[6]) + 8) >> 4];
154  dst[5] = cm[(9 * (src[5] + src[6]) - (src[4] + src[7]) + 8) >> 4];
155  dst[6] = cm[(9 * (src[6] + src[7]) - (src[5] + src[8]) + 8) >> 4];
156  dst[7] = cm[(9 * (src[7] + src[8]) - (src[6] + src[9]) + 8) >> 4];
157  dst += dstStride;
158  src += srcStride;
159  }
160 }
161 
162 static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src,
163  int dstStride, int srcStride, int w)
164 {
165  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
166  int i;
167 
168  for (i = 0; i < w; i++) {
169  const int src_1 = src[-srcStride];
170  const int src0 = src[0];
171  const int src1 = src[srcStride];
172  const int src2 = src[2 * srcStride];
173  const int src3 = src[3 * srcStride];
174  const int src4 = src[4 * srcStride];
175  const int src5 = src[5 * srcStride];
176  const int src6 = src[6 * srcStride];
177  const int src7 = src[7 * srcStride];
178  const int src8 = src[8 * srcStride];
179  const int src9 = src[9 * srcStride];
180  dst[0 * dstStride] = cm[(9 * (src0 + src1) - (src_1 + src2) + 8) >> 4];
181  dst[1 * dstStride] = cm[(9 * (src1 + src2) - (src0 + src3) + 8) >> 4];
182  dst[2 * dstStride] = cm[(9 * (src2 + src3) - (src1 + src4) + 8) >> 4];
183  dst[3 * dstStride] = cm[(9 * (src3 + src4) - (src2 + src5) + 8) >> 4];
184  dst[4 * dstStride] = cm[(9 * (src4 + src5) - (src3 + src6) + 8) >> 4];
185  dst[5 * dstStride] = cm[(9 * (src5 + src6) - (src4 + src7) + 8) >> 4];
186  dst[6 * dstStride] = cm[(9 * (src6 + src7) - (src5 + src8) + 8) >> 4];
187  dst[7 * dstStride] = cm[(9 * (src7 + src8) - (src6 + src9) + 8) >> 4];
188  src++;
189  dst++;
190  }
191 }
192 
193 static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
194 {
195  uint8_t half[64];
196 
197  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
198  ff_put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
199 }
200 
201 static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
202 {
203  wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
204 }
205 
206 static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
207 {
208  uint8_t half[64];
209 
210  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
211  ff_put_pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8);
212 }
213 
214 static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
215 {
216  wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
217 }
218 
219 static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
220 {
221  uint8_t halfH[88];
222  uint8_t halfV[64];
223  uint8_t halfHV[64];
224 
225  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
226  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
227  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
228  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
229 }
230 
231 static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
232 {
233  uint8_t halfH[88];
234  uint8_t halfV[64];
235  uint8_t halfHV[64];
236 
237  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
238  wmv2_mspel8_v_lowpass(halfV, src + 1, 8, stride, 8);
239  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
240  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
241 }
242 
243 static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
244 {
245  uint8_t halfH[88];
246 
247  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
248  wmv2_mspel8_v_lowpass(dst, halfH + 8, stride, 8, 8);
249 }
250 
252 {
256 
265 
266  if (ARCH_MIPS)
268 }
#define a0
Definition: regdef.h:46
#define MAX_NEG_CROP
Definition: mathops.h:31
#define a1
Definition: regdef.h:47
#define src
Definition: vp8dsp.c:254
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: qpeldsp.c:730
Macro definitions for various function/variable attributes.
#define W6
Definition: wmv2dsp.c:32
#define a3
Definition: regdef.h:49
static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:96
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
#define av_cold
Definition: attributes.h:82
int idct_perm
Definition: wmv2dsp.h:32
static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:243
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
static void wmv2_idct_col(short *b)
Definition: wmv2dsp.c:65
static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:193
#define cm
Definition: dvbsubdec.c:37
static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: wmv2dsp.c:142
#define U(x)
Definition: vp56_arith.h:37
static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:214
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define W5
Definition: wmv2dsp.c:31
#define W0
Definition: wmv2dsp.c:26
#define s2
Definition: regdef.h:39
void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:703
#define W1
Definition: wmv2dsp.c:27
#define W7
Definition: wmv2dsp.c:33
av_cold void ff_wmv2dsp_init(WMV2DSPContext *c)
Definition: wmv2dsp.c:251
static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:231
qpel_mc_func put_mspel_pixels_tab[8]
Definition: wmv2dsp.h:30
#define b
Definition: input.c:41
static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:119
uint8_t w
Definition: llviddspenc.c:38
#define a2
Definition: regdef.h:48
static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:219
av_cold void ff_wmv2dsp_init_mips(WMV2DSPContext *c)
static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:201
#define src1
Definition: h264pred.c:139
#define a5
Definition: regdef.h:51
Libavcodec external API header.
#define src0
Definition: h264pred.c:138
#define s1
Definition: regdef.h:38
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
int
static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:206
common internal and external API header
static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int w)
Definition: wmv2dsp.c:162
#define ff_crop_tab
#define a4
Definition: regdef.h:50
void(* idct_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.h:28
static void wmv2_idct_row(short *b)
Definition: wmv2dsp.c:35
#define W3
Definition: wmv2dsp.c:29
#define W2
Definition: wmv2dsp.c:28
void(* idct_add)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.h:27