FFmpeg
vp9dsp_template.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/common.h"
25 #include "bit_depth_template.c"
26 #include "vp9dsp.h"
27 
28 #if BIT_DEPTH != 12
29 
30 // FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
31 // back with h264pred.[ch]
32 
33 static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
34  const uint8_t *left, const uint8_t *_top)
35 {
36  pixel *dst = (pixel *) _dst;
37  const pixel *top = (const pixel *) _top;
38  pixel4 p4 = AV_RN4PA(top);
39 
40  stride /= sizeof(pixel);
41  AV_WN4PA(dst + stride * 0, p4);
42  AV_WN4PA(dst + stride * 1, p4);
43  AV_WN4PA(dst + stride * 2, p4);
44  AV_WN4PA(dst + stride * 3, p4);
45 }
46 
47 static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
48  const uint8_t *left, const uint8_t *_top)
49 {
50  pixel *dst = (pixel *) _dst;
51  const pixel *top = (const pixel *) _top;
52  pixel4 p4a = AV_RN4PA(top + 0);
53  pixel4 p4b = AV_RN4PA(top + 4);
54  int y;
55 
56  stride /= sizeof(pixel);
57  for (y = 0; y < 8; y++) {
58  AV_WN4PA(dst + 0, p4a);
59  AV_WN4PA(dst + 4, p4b);
60  dst += stride;
61  }
62 }
63 
64 static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
65  const uint8_t *left, const uint8_t *_top)
66 {
67  pixel *dst = (pixel *) _dst;
68  const pixel *top = (const pixel *) _top;
69  pixel4 p4a = AV_RN4PA(top + 0);
70  pixel4 p4b = AV_RN4PA(top + 4);
71  pixel4 p4c = AV_RN4PA(top + 8);
72  pixel4 p4d = AV_RN4PA(top + 12);
73  int y;
74 
75  stride /= sizeof(pixel);
76  for (y = 0; y < 16; y++) {
77  AV_WN4PA(dst + 0, p4a);
78  AV_WN4PA(dst + 4, p4b);
79  AV_WN4PA(dst + 8, p4c);
80  AV_WN4PA(dst + 12, p4d);
81  dst += stride;
82  }
83 }
84 
85 static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride,
86  const uint8_t *left, const uint8_t *_top)
87 {
88  pixel *dst = (pixel *) _dst;
89  const pixel *top = (const pixel *) _top;
90  pixel4 p4a = AV_RN4PA(top + 0);
91  pixel4 p4b = AV_RN4PA(top + 4);
92  pixel4 p4c = AV_RN4PA(top + 8);
93  pixel4 p4d = AV_RN4PA(top + 12);
94  pixel4 p4e = AV_RN4PA(top + 16);
95  pixel4 p4f = AV_RN4PA(top + 20);
96  pixel4 p4g = AV_RN4PA(top + 24);
97  pixel4 p4h = AV_RN4PA(top + 28);
98  int y;
99 
100  stride /= sizeof(pixel);
101  for (y = 0; y < 32; y++) {
102  AV_WN4PA(dst + 0, p4a);
103  AV_WN4PA(dst + 4, p4b);
104  AV_WN4PA(dst + 8, p4c);
105  AV_WN4PA(dst + 12, p4d);
106  AV_WN4PA(dst + 16, p4e);
107  AV_WN4PA(dst + 20, p4f);
108  AV_WN4PA(dst + 24, p4g);
109  AV_WN4PA(dst + 28, p4h);
110  dst += stride;
111  }
112 }
113 
114 static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride,
115  const uint8_t *_left, const uint8_t *top)
116 {
117  pixel *dst = (pixel *) _dst;
118  const pixel *left = (const pixel *) _left;
119 
120  stride /= sizeof(pixel);
121  AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3]));
122  AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2]));
123  AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1]));
124  AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0]));
125 }
126 
127 static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride,
128  const uint8_t *_left, const uint8_t *top)
129 {
130  pixel *dst = (pixel *) _dst;
131  const pixel *left = (const pixel *) _left;
132  int y;
133 
134  stride /= sizeof(pixel);
135  for (y = 0; y < 8; y++) {
136  pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]);
137 
138  AV_WN4PA(dst + 0, p4);
139  AV_WN4PA(dst + 4, p4);
140  dst += stride;
141  }
142 }
143 
144 static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride,
145  const uint8_t *_left, const uint8_t *top)
146 {
147  pixel *dst = (pixel *) _dst;
148  const pixel *left = (const pixel *) _left;
149  int y;
150 
151  stride /= sizeof(pixel);
152  for (y = 0; y < 16; y++) {
153  pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]);
154 
155  AV_WN4PA(dst + 0, p4);
156  AV_WN4PA(dst + 4, p4);
157  AV_WN4PA(dst + 8, p4);
158  AV_WN4PA(dst + 12, p4);
159  dst += stride;
160  }
161 }
162 
163 static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride,
164  const uint8_t *_left, const uint8_t *top)
165 {
166  pixel *dst = (pixel *) _dst;
167  const pixel *left = (const pixel *) _left;
168  int y;
169 
170  stride /= sizeof(pixel);
171  for (y = 0; y < 32; y++) {
172  pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]);
173 
174  AV_WN4PA(dst + 0, p4);
175  AV_WN4PA(dst + 4, p4);
176  AV_WN4PA(dst + 8, p4);
177  AV_WN4PA(dst + 12, p4);
178  AV_WN4PA(dst + 16, p4);
179  AV_WN4PA(dst + 20, p4);
180  AV_WN4PA(dst + 24, p4);
181  AV_WN4PA(dst + 28, p4);
182  dst += stride;
183  }
184 }
185 
186 #endif /* BIT_DEPTH != 12 */
187 
188 static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride,
189  const uint8_t *_left, const uint8_t *_top)
190 {
191  pixel *dst = (pixel *) _dst;
192  const pixel *left = (const pixel *) _left;
193  const pixel *top = (const pixel *) _top;
194  int y, tl = top[-1];
195 
196  stride /= sizeof(pixel);
197  for (y = 0; y < 4; y++) {
198  int l_m_tl = left[3 - y] - tl;
199 
200  dst[0] = av_clip_pixel(top[0] + l_m_tl);
201  dst[1] = av_clip_pixel(top[1] + l_m_tl);
202  dst[2] = av_clip_pixel(top[2] + l_m_tl);
203  dst[3] = av_clip_pixel(top[3] + l_m_tl);
204  dst += stride;
205  }
206 }
207 
208 static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride,
209  const uint8_t *_left, const uint8_t *_top)
210 {
211  pixel *dst = (pixel *) _dst;
212  const pixel *left = (const pixel *) _left;
213  const pixel *top = (const pixel *) _top;
214  int y, tl = top[-1];
215 
216  stride /= sizeof(pixel);
217  for (y = 0; y < 8; y++) {
218  int l_m_tl = left[7 - y] - tl;
219 
220  dst[0] = av_clip_pixel(top[0] + l_m_tl);
221  dst[1] = av_clip_pixel(top[1] + l_m_tl);
222  dst[2] = av_clip_pixel(top[2] + l_m_tl);
223  dst[3] = av_clip_pixel(top[3] + l_m_tl);
224  dst[4] = av_clip_pixel(top[4] + l_m_tl);
225  dst[5] = av_clip_pixel(top[5] + l_m_tl);
226  dst[6] = av_clip_pixel(top[6] + l_m_tl);
227  dst[7] = av_clip_pixel(top[7] + l_m_tl);
228  dst += stride;
229  }
230 }
231 
232 static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride,
233  const uint8_t *_left, const uint8_t *_top)
234 {
235  pixel *dst = (pixel *) _dst;
236  const pixel *left = (const pixel *) _left;
237  const pixel *top = (const pixel *) _top;
238  int y, tl = top[-1];
239 
240  stride /= sizeof(pixel);
241  for (y = 0; y < 16; y++) {
242  int l_m_tl = left[15 - y] - tl;
243 
244  dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
245  dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
246  dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
247  dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
248  dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
249  dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
250  dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
251  dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
252  dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
253  dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
254  dst[10] = av_clip_pixel(top[10] + l_m_tl);
255  dst[11] = av_clip_pixel(top[11] + l_m_tl);
256  dst[12] = av_clip_pixel(top[12] + l_m_tl);
257  dst[13] = av_clip_pixel(top[13] + l_m_tl);
258  dst[14] = av_clip_pixel(top[14] + l_m_tl);
259  dst[15] = av_clip_pixel(top[15] + l_m_tl);
260  dst += stride;
261  }
262 }
263 
264 static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride,
265  const uint8_t *_left, const uint8_t *_top)
266 {
267  pixel *dst = (pixel *) _dst;
268  const pixel *left = (const pixel *) _left;
269  const pixel *top = (const pixel *) _top;
270  int y, tl = top[-1];
271 
272  stride /= sizeof(pixel);
273  for (y = 0; y < 32; y++) {
274  int l_m_tl = left[31 - y] - tl;
275 
276  dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
277  dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
278  dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
279  dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
280  dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
281  dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
282  dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
283  dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
284  dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
285  dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
286  dst[10] = av_clip_pixel(top[10] + l_m_tl);
287  dst[11] = av_clip_pixel(top[11] + l_m_tl);
288  dst[12] = av_clip_pixel(top[12] + l_m_tl);
289  dst[13] = av_clip_pixel(top[13] + l_m_tl);
290  dst[14] = av_clip_pixel(top[14] + l_m_tl);
291  dst[15] = av_clip_pixel(top[15] + l_m_tl);
292  dst[16] = av_clip_pixel(top[16] + l_m_tl);
293  dst[17] = av_clip_pixel(top[17] + l_m_tl);
294  dst[18] = av_clip_pixel(top[18] + l_m_tl);
295  dst[19] = av_clip_pixel(top[19] + l_m_tl);
296  dst[20] = av_clip_pixel(top[20] + l_m_tl);
297  dst[21] = av_clip_pixel(top[21] + l_m_tl);
298  dst[22] = av_clip_pixel(top[22] + l_m_tl);
299  dst[23] = av_clip_pixel(top[23] + l_m_tl);
300  dst[24] = av_clip_pixel(top[24] + l_m_tl);
301  dst[25] = av_clip_pixel(top[25] + l_m_tl);
302  dst[26] = av_clip_pixel(top[26] + l_m_tl);
303  dst[27] = av_clip_pixel(top[27] + l_m_tl);
304  dst[28] = av_clip_pixel(top[28] + l_m_tl);
305  dst[29] = av_clip_pixel(top[29] + l_m_tl);
306  dst[30] = av_clip_pixel(top[30] + l_m_tl);
307  dst[31] = av_clip_pixel(top[31] + l_m_tl);
308  dst += stride;
309  }
310 }
311 
312 #if BIT_DEPTH != 12
313 
314 static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride,
315  const uint8_t *_left, const uint8_t *_top)
316 {
317  pixel *dst = (pixel *) _dst;
318  const pixel *left = (const pixel *) _left;
319  const pixel *top = (const pixel *) _top;
320  pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] +
321  top[0] + top[1] + top[2] + top[3] + 4) >> 3);
322 
323  stride /= sizeof(pixel);
324  AV_WN4PA(dst + stride * 0, dc);
325  AV_WN4PA(dst + stride * 1, dc);
326  AV_WN4PA(dst + stride * 2, dc);
327  AV_WN4PA(dst + stride * 3, dc);
328 }
329 
330 static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride,
331  const uint8_t *_left, const uint8_t *_top)
332 {
333  pixel *dst = (pixel *) _dst;
334  const pixel *left = (const pixel *) _left;
335  const pixel *top = (const pixel *) _top;
337  ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
338  left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
339  top[4] + top[5] + top[6] + top[7] + 8) >> 4);
340  int y;
341 
342  stride /= sizeof(pixel);
343  for (y = 0; y < 8; y++) {
344  AV_WN4PA(dst + 0, dc);
345  AV_WN4PA(dst + 4, dc);
346  dst += stride;
347  }
348 }
349 
350 static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride,
351  const uint8_t *_left, const uint8_t *_top)
352 {
353  pixel *dst = (pixel *) _dst;
354  const pixel *left = (const pixel *) _left;
355  const pixel *top = (const pixel *) _top;
357  ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
358  left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
359  left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
360  top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
361  top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
362  int y;
363 
364  stride /= sizeof(pixel);
365  for (y = 0; y < 16; y++) {
366  AV_WN4PA(dst + 0, dc);
367  AV_WN4PA(dst + 4, dc);
368  AV_WN4PA(dst + 8, dc);
369  AV_WN4PA(dst + 12, dc);
370  dst += stride;
371  }
372 }
373 
374 static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride,
375  const uint8_t *_left, const uint8_t *_top)
376 {
377  pixel *dst = (pixel *) _dst;
378  const pixel *left = (const pixel *) _left;
379  const pixel *top = (const pixel *) _top;
381  ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
382  left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
383  left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
384  left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
385  left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
386  left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
387  top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
388  top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
389  top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
390  top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
391  int y;
392 
393  stride /= sizeof(pixel);
394  for (y = 0; y < 32; y++) {
395  AV_WN4PA(dst + 0, dc);
396  AV_WN4PA(dst + 4, dc);
397  AV_WN4PA(dst + 8, dc);
398  AV_WN4PA(dst + 12, dc);
399  AV_WN4PA(dst + 16, dc);
400  AV_WN4PA(dst + 20, dc);
401  AV_WN4PA(dst + 2