FFmpeg
wmv2dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/attributes.h"
20 #include "libavutil/common.h"
21 #include "avcodec.h"
22 #include "idctdsp.h"
23 #include "mathops.h"
24 #include "wmv2dsp.h"
25 
26 #define W0 2048
27 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
28 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
29 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
30 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
31 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
32 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
33 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
34 
35 static void wmv2_idct_row(short * b)
36 {
37  int s1, s2;
38  int a0, a1, a2, a3, a4, a5, a6, a7;
39 
40  /* step 1 */
41  a1 = W1 * b[1] + W7 * b[7];
42  a7 = W7 * b[1] - W1 * b[7];
43  a5 = W5 * b[5] + W3 * b[3];
44  a3 = W3 * b[5] - W5 * b[3];
45  a2 = W2 * b[2] + W6 * b[6];
46  a6 = W6 * b[2] - W2 * b[6];
47  a0 = W0 * b[0] + W0 * b[4];
48  a4 = W0 * b[0] - W0 * b[4];
49 
50  /* step 2 */
51  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8; // 1, 3, 5, 7
52  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
53 
54  /* step 3 */
55  b[0] = (a0 + a2 + a1 + a5 + (1 << 7)) >> 8;
56  b[1] = (a4 + a6 + s1 + (1 << 7)) >> 8;
57  b[2] = (a4 - a6 + s2 + (1 << 7)) >> 8;
58  b[3] = (a0 - a2 + a7 + a3 + (1 << 7)) >> 8;
59  b[4] = (a0 - a2 - a7 - a3 + (1 << 7)) >> 8;
60  b[5] = (a4 - a6 - s2 + (1 << 7)) >> 8;
61  b[6] = (a4 + a6 - s1 + (1 << 7)) >> 8;
62  b[7] = (a0 + a2 - a1 - a5 + (1 << 7)) >> 8;
63 }
64 
65 static void wmv2_idct_col(short * b)
66 {
67  int s1, s2;
68  int a0, a1, a2, a3, a4, a5, a6, a7;
69 
70  /* step 1, with extended precision */
71  a1 = (W1 * b[8 * 1] + W7 * b[8 * 7] + 4) >> 3;
72  a7 = (W7 * b[8 * 1] - W1 * b[8 * 7] + 4) >> 3;
73  a5 = (W5 * b[8 * 5] + W3 * b[8 * 3] + 4) >> 3;
74  a3 = (W3 * b[8 * 5] - W5 * b[8 * 3] + 4) >> 3;
75  a2 = (W2 * b[8 * 2] + W6 * b[8 * 6] + 4) >> 3;
76  a6 = (W6 * b[8 * 2] - W2 * b[8 * 6] + 4) >> 3;
77  a0 = (W0 * b[8 * 0] + W0 * b[8 * 4] ) >> 3;
78  a4 = (W0 * b[8 * 0] - W0 * b[8 * 4] ) >> 3;
79 
80  /* step 2 */
81  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8;
82  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
83 
84  /* step 3 */
85  b[8 * 0] = (a0 + a2 + a1 + a5 + (1 << 13)) >> 14;
86  b[8 * 1] = (a4 + a6 + s1 + (1 << 13)) >> 14;
87  b[8 * 2] = (a4 - a6 + s2 + (1 << 13)) >> 14;
88  b[8 * 3] = (a0 - a2 + a7 + a3 + (1 << 13)) >> 14;
89 
90  b[8 * 4] = (a0 - a2 - a7 - a3 + (1 << 13)) >> 14;
91  b[8 * 5] = (a4 - a6 - s2 + (1 << 13)) >> 14;
92  b[8 * 6] = (a4 + a6 - s1 + (1 << 13)) >> 14;
93  b[8 * 7] = (a0 + a2 - a1 - a5 + (1 << 13)) >> 14;
94 }
95 
96 static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
97 {
98  int i;
99 
100  for (i = 0; i < 64; i += 8)
101  wmv2_idct_row(block + i);
102  for (i = 0; i < 8; i++)
103  wmv2_idct_col(block + i);
104 
105  for (i = 0; i < 8; i++) {
106  dest[0] = av_clip_uint8(dest[0] + block[0]);
107  dest[1] = av_clip_uint8(dest[1] + block[1]);
108  dest[2] = av_clip_uint8(dest[2] + block[2]);
109  dest[3] = av_clip_uint8(dest[3] + block[3]);
110  dest[4] = av_clip_uint8(dest[4] + block[4]);
111  dest[5] = av_clip_uint8(dest[5] + block[5]);
112  dest[6] = av_clip_uint8(dest[6] + block[6]);
113  dest[7] = av_clip_uint8(dest[7] + block[7]);
114  dest += line_size;
115  block += 8;
116  }
117 }
118 
119 static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
120 {
121  int i;
122 
123  for (i = 0; i < 64; i += 8)
124  wmv2_idct_row(block + i);
125  for (i = 0; i < 8; i++)
126  wmv2_idct_col(block + i);
127 
128  for (i = 0; i < 8; i++) {
129  dest[0] = av_clip_uint8(block[0]);
130  dest[1] = av_clip_uint8(block[1]);
131  dest[2] = av_clip_uint8(block[2]);
132  dest[3] = av_clip_uint8(block[3]);
133  dest[4] = av_clip_uint8(block[4]);
134  dest[5] = av_clip_uint8(block[5]);
135  dest[6] = av_clip_uint8(block[6]);
136  dest[7] = av_clip_uint8(block[7]);
137  dest += line_size;
138  block += 8;
139  }
140 }
141 
142 static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src,
143  int dstStride, int srcStride, int h)
144 {
145  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
146  int i;
147 
148  for (i = 0; i < h; i++) {
149  dst[0] = cm[(9 * (src[0] + src[1]) - (src[-1] + src[2]) + 8) >> 4];
150  dst[1] = cm[(9 * (src[1] + src[2]) - (src[0] + src[3]) + 8) >> 4];
151  dst[2] = cm[(9 * (src[2] + src[3]) - (src[1] + src[4]) + 8) >> 4];
152  dst[3] = cm[(9 * (src[3] + src[4]) - (src[2] + src[5]) + 8) >> 4];
153  dst[4] = cm[(9 * (src[4] + src[5]) - (src[3] + src[6]) + 8) >> 4];
154  dst[5] = cm[(9 * (src[5] + src[6]) - (src[4] + src[7]) + 8) >> 4];
155  dst[6] = cm[(9 * (src[6] + src[7]) - (src[5] + src[8]) + 8) >> 4];
156  dst[7] = cm[(9 * (src[7] + src[8]) - (src[6] + src[9]) + 8) >> 4];
157  dst += dstStride;
158  src += srcStride;
159  }
160 }
161 
162 static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src,
163  int dstStride, int srcStride, int w)
164 {
165  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
166  int i;
167 
168  for (i = 0; i < w; i++) {
169  const int src_1 = src[-srcStride];
170  const int src0 = src[0];
171  const int src1 = src[srcStride];
172  const int src2 = src[2 * srcStride];
173  const int src3 = src[3 * srcStride];
174  const int src4 = src[4 * srcStride];
175  const int src5 = src[5 * srcStride];
176  const int src6 = src[6 * srcStride];
177  const int src7 = src[7 * srcStride];
178  const int src8 = src[8 * srcStride];
179  const int src9 = src[9 * srcStride];
180  dst[0 * dstStride] = cm[(9 * (src0 + src1) - (src_1 + src2) + 8) >> 4];
181  dst[1 * dstStride] = cm[(9 * (src1 + src2) - (src0 + src3) + 8) >> 4];
182  dst[2 * dstStride] = cm[(9 * (src2 + src3) - (src1 + src4) + 8) >> 4];
183  dst[3 * dstStride] = cm[(9 * (src3 + src4) - (src2 + src5) + 8) >> 4];
184  dst[4 * dstStride] = cm[(9 * (src4 + src5) - (src3 + src6) + 8) >> 4];
185  dst[5 * dstStride] = cm[(9 * (src5 + src6) - (src4 + src7) + 8) >> 4];
186  dst[6 * dstStride] = cm[(9 * (src6 + src7) - (src5 + src8) + 8) >> 4];
187  dst[7 * dstStride] = cm[(9 * (src7 + src8) - (src6 + src9) + 8) >> 4];
188  src++;
189  dst++;
190  }
191 }
192 
193 static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
194 {
195  uint8_t half[64];
196 
197  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
198  ff_put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
199 }
200 
201 static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
202 {
204 }
205 
206 static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
207 {
208  uint8_t half[64];
209 
210  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
211  ff_put_pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8);
212 }
213 
214 static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
215 {
217 }
218 
219 static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
220 {
221  uint8_t halfH[88];
222  uint8_t halfV[64];
223  uint8_t halfHV[64];
224 
225  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
226  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
227  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
228  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
229 }
230 
231 static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
232 {
233  uint8_t halfH[88];
234  uint8_t halfV[64];
235  uint8_t halfHV[64];
236 
237  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
238  wmv2_mspel8_v_lowpass(halfV, src + 1, 8, stride, 8);
239  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
240  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
241 }
242 
243 static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
244 {
245  uint8_t halfH[88];
246 
247  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
248  wmv2_mspel8_v_lowpass(dst, halfH + 8, stride, 8, 8);
249 }
250 
252 {
253  c->idct_add = wmv2_idct_add_c;
254  c->idct_put = wmv2_idct_put_c;
255  c->idct_perm = FF_IDCT_PERM_NONE;
256 
257  c->put_mspel_pixels_tab[0] = ff_put_pixels8x8_c;
258  c->put_mspel_pixels_tab[1] = put_mspel8_mc10_c;
259  c->put_mspel_pixels_tab[2] = put_mspel8_mc20_c;
260  c->put_mspel_pixels_tab[3] = put_mspel8_mc30_c;
261  c->put_mspel_pixels_tab[4] = put_mspel8_mc02_c;
262  c->put_mspel_pixels_tab[5] = put_mspel8_mc12_c;
263  c->put_mspel_pixels_tab[6] = put_mspel8_mc22_c;
264  c->put_mspel_pixels_tab[7] = put_mspel8_mc32_c;
265 
266  if (ARCH_MIPS)
268 }
stride
int stride
Definition: mace.c:144
wmv2_idct_col
static void wmv2_idct_col(short *b)
Definition: wmv2dsp.c:65
wmv2_idct_add_c
static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:96
w
uint8_t w
Definition: llviddspenc.c:38
put_mspel8_mc10_c
static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:193
b
#define b
Definition: input.c:41
ff_put_pixels8_l2_8
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: qpeldsp.c:730
W6
#define W6
Definition: wmv2dsp.c:32
wmv2_mspel8_h_lowpass
static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: wmv2dsp.c:142
ff_crop_tab
#define ff_crop_tab
Definition: motionpixels_tablegen.c:26
put_mspel8_mc22_c
static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:243
put_mspel8_mc02_c
static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:214
U
#define U(x)
Definition: vp56_arith.h:37
ff_wmv2dsp_init_mips
av_cold void ff_wmv2dsp_init_mips(WMV2DSPContext *c)
Definition: wmv2dsp_init_mips.c:33
src
#define src
Definition: vp8dsp.c:254
a1
#define a1
Definition: regdef.h:47
W0
#define W0
Definition: wmv2dsp.c:26
ff_put_pixels8x8_c
void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:703
av_cold
#define av_cold
Definition: attributes.h:84
s1
#define s1
Definition: regdef.h:38
wmv2_idct_put_c
static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:119
a4
#define a4
Definition: regdef.h:50
W5
#define W5
Definition: wmv2dsp.c:31
put_mspel8_mc32_c
static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:231
wmv2dsp.h
mathops.h
ff_wmv2dsp_init
av_cold void ff_wmv2dsp_init(WMV2DSPContext *c)
Definition: wmv2dsp.c:251
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
put_mspel8_mc20_c
static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:201
s2
#define s2
Definition: regdef.h:39
FF_IDCT_PERM_NONE
@ FF_IDCT_PERM_NONE
Definition: idctdsp.h:38
W1
#define W1
Definition: wmv2dsp.c:27
W7
#define W7
Definition: wmv2dsp.c:33
put_mspel8_mc12_c
static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:219
attributes.h
put_mspel8_mc30_c
static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:206
a0
#define a0
Definition: regdef.h:46
src0
#define src0
Definition: h264pred.c:138
WMV2DSPContext
Definition: wmv2dsp.h:26
src1
#define src1
Definition: h264pred.c:139
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
a2
#define a2
Definition: regdef.h:48
common.h
uint8_t
uint8_t
Definition: audio_convert.c:194
idctdsp.h
wmv2_idct_row
static void wmv2_idct_row(short *b)
Definition: wmv2dsp.c:35
avcodec.h
a5
#define a5
Definition: regdef.h:51
W3
#define W3
Definition: wmv2dsp.c:29
cm
#define cm
Definition: dvbsubdec.c:37
wmv2_mspel8_v_lowpass
static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int w)
Definition: wmv2dsp.c:162
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
W2
#define W2
Definition: wmv2dsp.c:28
h
h
Definition: vp9dsp_template.c:2038
MAX_NEG_CROP
#define MAX_NEG_CROP
Definition: mathops.h:31
int
int
Definition: ffmpeg_filter.c:191
a3
#define a3
Definition: regdef.h:49