FFmpeg
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  * Copyright (C) 2010 Ronald S. Bultje
4  * Copyright (C) 2014 Peter Ross
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * VP8 compatible video decoder
26  */
27 
28 #include "libavutil/common.h"
29 #include "libavutil/intreadwrite.h"
30 
31 #include "mathops.h"
32 #include "vp8dsp.h"
33 
34 #define MK_IDCT_DC_ADD4_C(name) \
35 static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \
36  ptrdiff_t stride) \
37 { \
38  name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \
39  name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \
40  name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \
41  name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \
42 } \
43  \
44 static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \
45  ptrdiff_t stride) \
46 { \
47  name ## _idct_dc_add_c(dst + 0, block[0], stride); \
48  name ## _idct_dc_add_c(dst + 4, block[1], stride); \
49  name ## _idct_dc_add_c(dst + 8, block[2], stride); \
50  name ## _idct_dc_add_c(dst + 12, block[3], stride); \
51 }
52 
53 #if CONFIG_VP7_DECODER
54 static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
55 {
56  int i;
57  unsigned a1, b1, c1, d1;
58  int16_t tmp[16];
59 
60  for (i = 0; i < 4; i++) {
61  a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
62  b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
63  c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
64  d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
65  tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
66  tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
67  tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
68  tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
69  }
70 
71  for (i = 0; i < 4; i++) {
72  a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
73  b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
74  c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
75  d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
76  AV_ZERO64(dc + i * 4);
77  block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18;
78  block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18;
79  block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18;
80  block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18;
81  }
82 }
83 
84 static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
85 {
86  int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
87  dc[0] = 0;
88 
89  for (i = 0; i < 4; i++) {
90  block[i][0][0] = val;
91  block[i][1][0] = val;
92  block[i][2][0] = val;
93  block[i][3][0] = val;
94  }
95 }
96 
97 static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
98 {
99  int i;
100  unsigned a1, b1, c1, d1;
101  int16_t tmp[16];
102 
103  for (i = 0; i < 4; i++) {
104  a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
105  b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
106  c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
107  d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
108  AV_ZERO64(block + i * 4);
109  tmp[i * 4 + 0] = (int)(a1 + d1) >> 14;
110  tmp[i * 4 + 3] = (int)(a1 - d1) >> 14;
111  tmp[i * 4 + 1] = (int)(b1 + c1) >> 14;
112  tmp[i * 4 + 2] = (int)(b1 - c1) >> 14;
113  }
114 
115  for (i = 0; i < 4; i++) {
116  a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
117  b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
118  c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
119  d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
120  dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
121  ((int)(a1 + d1 + 0x20000) >> 18));
122  dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
123  ((int)(a1 - d1 + 0x20000) >> 18));
124  dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
125  ((int)(b1 + c1 + 0x20000) >> 18));
126  dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
127  ((int)(b1 - c1 + 0x20000) >> 18));
128  }
129 }
130 
131 static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
132 {
133  int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
134  block[0] = 0;
135 
136  for (i = 0; i < 4; i++) {
137  dst[0] = av_clip_uint8(dst[0] + dc);
138  dst[1] = av_clip_uint8(dst[1] + dc);
139  dst[2] = av_clip_uint8(dst[2] + dc);
140  dst[3] = av_clip_uint8(dst[3] + dc);
141  dst += stride;
142  }
143 }
144 
146 #endif /* CONFIG_VP7_DECODER */
147 
148 // TODO: Maybe add dequant
149 #if CONFIG_VP8_DECODER
150 static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
151 {
152  int i, t0, t1, t2, t3;
153 
154  for (i = 0; i < 4; i++) {
155  t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
156  t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
157  t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
158  t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
159 
160  dc[0 * 4 + i] = t0 + t1;
161  dc[1 * 4 + i] = t3 + t2;
162  dc[2 * 4 + i] = t0 - t1;
163  dc[3 * 4 + i] = t3 - t2;
164  }
165 
166  for (i = 0; i < 4; i++) {
167  t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
168  t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
169  t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
170  t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
171  AV_ZERO64(dc + i * 4);
172 
173  block[i][0][0] = (t0 + t1) >> 3;
174  block[i][1][0] = (t3 + t2) >> 3;
175  block[i][2][0] = (t0 - t1) >> 3;
176  block[i][3][0] = (t3 - t2) >> 3;
177  }
178 }
179 
180 static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
181 {
182  int i, val = (dc[0] + 3) >> 3;
183  dc[0] = 0;
184 
185  for (i = 0; i < 4; i++) {
186  block[i][0][0] = val;
187  block[i][1][0] = val;
188  block[i][2][0] = val;
189  block[i][3][0] = val;
190  }
191 }
192 
193 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
194 #define MUL_35468(a) (((a) * 35468) >> 16)
195 
196 static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
197 {
198  int i, t0, t1, t2, t3;
199  int16_t tmp[16];
200 
201  for (i = 0; i < 4; i++) {
202  t0 = block[0 * 4 + i] + block[2 * 4 + i];
203  t1 = block[0 * 4 + i] - block[2 * 4 + i];
204  t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
205  t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
206  block[0 * 4 + i] = 0;
207  block[1 * 4 + i] = 0;
208  block[2 * 4 + i] = 0;
209  block[3 * 4 + i] = 0;
210 
211  tmp[i * 4 + 0] = t0 + t3;
212  tmp[i * 4 + 1] = t1 + t2;
213  tmp[i * 4 + 2] = t1 - t2;
214  tmp[i * 4 + 3] = t0 - t3;
215  }
216 
217  for (i = 0; i < 4; i++) {
218  t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
219  t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
220  t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
221  t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
222 
223  dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
224  dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
225  dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
226  dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
227  dst += stride;
228  }
229 }
230 
231 static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
232 {
233  int i, dc = (block[0] + 4) >> 3;
234  block[0] = 0;
235 
236  for (i = 0; i < 4; i++) {
237  dst[0] = av_clip_uint8(dst[0] + dc);
238  dst[1] = av_clip_uint8(dst[1] + dc);
239  dst[2] = av_clip_uint8(dst[2] + dc);
240  dst[3] = av_clip_uint8(dst[3] + dc);
241  dst += stride;
242  }
243 }
244 
246 #endif /* CONFIG_VP8_DECODER */
247 
248 // because I like only having two parameters to pass functions...
249 #define LOAD_PIXELS \
250  int av_unused p3 = p[-4 * stride]; \
251  int av_unused p2 = p[-3 * stride]; \
252  int av_unused p1 = p[-2 * stride]; \
253  int av_unused p0 = p[-1 * stride]; \
254  int av_unused q0 = p[ 0 * stride]; \
255  int av_unused q1 = p[ 1 * stride]; \
256  int av_unused q2 = p[ 2 * stride]; \
257  int av_unused q3 = p[ 3 * stride];
258 
259 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
260 
261 static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
262  int is4tap, int is_vp7)
263 {
265  int a, f1, f2;
266  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
267 
268  a = 3 * (q0 - p0);
269 
270  if (is4tap)
271  a += clip_int8(p1 - q1);
272 
273  a = clip_int8(a);
274 
275  // We deviate from the spec here with c(a+3) >> 3
276  // since that's what libvpx does.
277  f1 = FFMIN(a + 4, 127) >> 3;
278 
279  if (is_vp7)
280  f2 = f1 - ((a & 7) == 4);
281  else
282  f2 = FFMIN(a + 3, 127) >> 3;
283 
284  // Despite what the spec says, we do need to clamp here to
285  // be bitexact with libvpx.
286  p[-1 * stride] = cm[p0 + f2];
287  p[ 0 * stride] = cm[q0 - f1];
288 
289  // only used for _inner on blocks without high edge variance
290  if (!is4tap) {
291  a = (f1 + 1) >> 1;
292  p[-2 * stride] = cm[p1 + a];
293  p[ 1 * stride] = cm[q1 - a];
294  }
295 }
296 
298  int is4tap)
299 {
300  filter_common(p, stride, is4tap, IS_VP7);
301 }
302 
304  int is4tap)
305 {
306  filter_common(p, stride, is4tap, IS_VP8);
307 }
308 
310  int flim)
311 {
313  return FFABS(p0 - q0) <= flim;
314 }
315 
317  int flim)
318 {
320  return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
321 }
322 
323 /**
324  * E - limit at the macroblock edge
325  * I - limit for interior difference
326  */
327 #define NORMAL_LIMIT(vpn) \
328 static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \
329  ptrdiff_t stride, \
330  int E, int I) \
331 { \
332  LOAD_PIXELS \
333  return vp ## vpn ## _simple_limit(p, stride, E) && \
334  FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \
335  FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \
336  FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \
337 }
338 
339 NORMAL_LIMIT(7)
340 NORMAL_LIMIT(8)
341 
342 // high edge variance
343 static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
344 {
346  return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
347 }
348 
349 static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
350 {
351  int a0, a1, a2, w;
352  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
353 
355 
356  w = clip_int8(p1 - q1);
357  w = clip_int8(w + 3 * (q0 - p0));
358 
359  a0 = (27 * w + 63) >> 7;
360  a1 = (18 * w + 63) >> 7;
361  a2 = (9 * w + 63) >> 7;
362 
363  p[-3 * stride] = cm[p2 + a2];
364  p[-2 * stride] = cm[p1 + a1];
365  p[-1 * stride] = cm[p0 + a0];
366  p[ 0 * stride] = cm[q0 - a0];
367  p[ 1 * stride] = cm[q1 - a1];
368  p[ 2 * stride] = cm[q2 - a2];
369 }
370 
371 #define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \
372 static maybe_inline \
373 void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \
374  ptrdiff_t stride, \
375  int flim_E, int flim_I, \
376  int hev_thresh) \
377 { \
378  int i; \
379  for (i = 0; i < size; i++) \
380  if (vpn ## _normal_limit(dst + i * stridea, strideb, \
381  flim_E, flim_I)) { \
382  if (hev(dst + i * stridea, strideb, hev_thresh)) \
383  vpn ## _filter_common(dst + i * stridea, strideb, 1); \
384  else \
385  filter_mbedge(dst + i * stridea, strideb); \
386  } \
387 } \
388  \
389 static maybe_inline \
390 void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \
391  ptrdiff_t stride, \
392  int flim_E, \
393  int flim_I, \
394  int hev_thresh) \
395 { \
396  int i; \
397  for (i = 0; i < size; i++) \
398  if (vpn ## _normal_limit(dst + i * stridea, strideb, \
399  flim_E, flim_I)) { \
400  int hv = hev(dst + i * stridea, strideb, hev_thresh); \
401  if (hv) \
402  vpn ## _filter_common(dst + i * stridea, strideb, 1); \
403  else \
404  vpn ## _filter_common(dst + i * stridea, strideb, 0); \
405  } \
406 }
407 
408 #define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \
409 LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \
410 static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \
411  uint8_t *dstV, \
412  ptrdiff_t stride, int fE, \
413  int fI, int hev_thresh) \
414 { \
415  vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \
416  vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \
417 } \
418  \
419 static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \
420  uint8_t *dstV, \
421  ptrdiff_t stride, \
422  int fE, int fI, \
423  int hev_thresh) \
424 { \
425  vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \
426  hev_thresh); \
427  vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \
428  hev_thresh); \
429 }
430 
431 #define LOOP_FILTER_SIMPLE(vpn) \
432 static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
433  int flim) \
434 { \
435  int i; \
436  for (i = 0; i < 16; i++) \
437  if (vpn ## _simple_limit(dst + i, stride, flim)) \
438  vpn ## _filter_common(dst + i, stride, 1); \
439 } \
440  \
441 static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
442  int flim) \
443 { \
444  int i; \
445  for (i = 0; i < 16; i++) \
446  if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \
447  vpn ## _filter_common(dst + i * stride, 1, 1); \
448 }
449 
450 #define LOOP_FILTERS(vpn) \
451  LOOP_FILTER(vpn, v, 16, 1, stride, ) \
452  LOOP_FILTER(vpn, h, 16, stride, 1, ) \
453  UV_LOOP_FILTER(vpn, v, 1, stride) \
454  UV_LOOP_FILTER(vpn, h, stride, 1) \
455  LOOP_FILTER_SIMPLE(vpn) \
456 
457 static const uint8_t subpel_filters[7][6] = {
458  { 0, 6, 123, 12, 1, 0 },
459  { 2, 11, 108, 36, 8, 1 },
460  { 0, 9, 93, 50, 6, 0 },
461  { 3, 16, 77, 77, 16, 3 },
462  { 0, 6, 50, 93, 9, 0 },
463  { 1, 8, 36, 108, 11, 2 },
464  { 0, 1, 12, 123, 6, 0 },
465 };
466 
467 #define PUT_PIXELS(WIDTH) \
468 static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \
469  uint8_t *src, ptrdiff_t srcstride, \
470  int h, int x, int y) \
471 { \
472  int i; \
473  for (i = 0; i < h; i++, dst += dststride, src += srcstride) \
474  memcpy(dst, src, WIDTH); \
475 }
476 
477 PUT_PIXELS(16)
478 PUT_PIXELS(8)
479 PUT_PIXELS(4)
480 
481 #define FILTER_6TAP(src, F, stride) \
482  cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
483  F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
484  F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
485 
486 #define FILTER_4TAP(src, F, stride) \
487  cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
488  F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
489 
490 #define VP8_EPEL_H(SIZE, TAPS) \
491 static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \
492  ptrdiff_t dststride, \
493  uint8_t *src, \
494  ptrdiff_t srcstride, \
495  int h, int mx, int my) \
496 { \
497  const uint8_t *filter = subpel_filters[mx - 1]; \
498  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
499  int x, y; \
500  for (y = 0; y < h; y++) { \
501  for (x = 0; x < SIZE; x++) \
502  dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
503  dst += dststride; \
504  src += srcstride; \
505  } \
506 }
507 
508 #define VP8_EPEL_V(SIZE, TAPS) \
509 static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \
510  ptrdiff_t dststride, \
511  uint8_t *src, \
512  ptrdiff_t srcstride, \
513  int h, int mx, int my) \
514 { \
515  const uint8_t *filter = subpel_filters[my - 1]; \
516  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
517  int x, y; \
518  for (y = 0; y < h; y++) { \
519  for (x = 0; x < SIZE; x++) \
520  dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
521  dst += dststride; \
522  src += srcstride; \
523  } \
524 }
525 
526 #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
527 static void \
528 put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \
529  ptrdiff_t dststride, \
530  uint8_t *src, \
531  ptrdiff_t srcstride, \
532  int h, int mx, \
533  int my) \
534 { \
535  const uint8_t *filter = subpel_filters[mx - 1]; \
536  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
537  int x, y; \
538  uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \
539  uint8_t *tmp = tmp_array; \
540  src -= (2 - (VTAPS == 4)) * srcstride; \
541  \
542  for (y = 0; y < h + VTAPS - 1; y++) { \
543  for (x = 0; x < SIZE; x++) \
544  tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
545  tmp += SIZE; \
546  src += srcstride; \
547  } \
548  tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \
549  filter = subpel_filters[my - 1]; \
550  \
551  for (y = 0; y < h; y++) { \
552  for (x = 0; x < SIZE; x++) \
553  dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
554  dst += dststride; \
555  tmp += SIZE; \
556  } \
557 }
558 
559 VP8_EPEL_H(16, 4)
560 VP8_EPEL_H(8, 4)
561 VP8_EPEL_H(4, 4)
562 VP8_EPEL_H(16, 6)
563 VP8_EPEL_H(8, 6)
564 VP8_EPEL_H(4, 6)
565 VP8_EPEL_V(16, 4)
566 VP8_EPEL_V(8, 4)
567 VP8_EPEL_V(4, 4)
568 VP8_EPEL_V(16, 6)
569 VP8_EPEL_V(8, 6)
570 VP8_EPEL_V(4, 6)
571 
572 VP8_EPEL_HV(16, 4, 4)
573 VP8_EPEL_HV(8, 4, 4)
574 VP8_EPEL_HV(4, 4, 4)
575 VP8_EPEL_HV(16, 4, 6)
576 VP8_EPEL_HV(8, 4, 6)
577 VP8_EPEL_HV(4, 4, 6)
578 VP8_EPEL_HV(16, 6, 4)
579 VP8_EPEL_HV(8, 6, 4)
580 VP8_EPEL_HV(4, 6, 4)
581 VP8_EPEL_HV(16, 6, 6)
582 VP8_EPEL_HV(8, 6, 6)
583 VP8_EPEL_HV(4, 6, 6)
584 
585 #define VP8_BILINEAR(SIZE) \
586 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
587  uint8_t *src, ptrdiff_t sstride, \
588  int h, int mx, int my) \
589 { \
590  int a = 8 - mx, b = mx; \
591  int x, y; \
592  for (y = 0; y < h; y++) { \
593  for (x = 0; x < SIZE; x++) \
594  dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
595  dst += dstride; \
596  src += sstride; \
597  } \
598 } \
599  \
600 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
601  uint8_t *src, ptrdiff_t sstride, \
602  int h, int mx, int my) \
603 { \
604  int c = 8 - my, d = my; \
605  int x, y; \
606  for (y = 0; y < h; y++) { \
607  for (x = 0; x < SIZE; x++) \
608  dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \
609  dst += dstride; \
610  src += sstride; \
611  } \
612 } \
613  \
614 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \
615  ptrdiff_t dstride, \
616  uint8_t *src, \
617  ptrdiff_t sstride, \
618  int h, int mx, int my) \
619 { \
620  int a = 8 - mx, b = mx; \
621  int c = 8 - my, d = my; \
622  int x, y; \
623  uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \
624  uint8_t *tmp = tmp_array; \
625  for (y = 0; y < h + 1; y++) { \
626  for (x = 0; x < SIZE; x++) \
627  tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
628  tmp += SIZE; \
629  src += sstride; \
630  } \
631  tmp = tmp_array; \
632  for (y = 0; y < h; y++) { \
633  for (x = 0; x < SIZE; x++) \
634  dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \
635  dst += dstride; \
636  tmp += SIZE; \
637  } \
638 }
639 
640 VP8_BILINEAR(16)
641 VP8_BILINEAR(8)
642 VP8_BILINEAR(4)
643 
644 #define VP78_MC_FUNC(IDX, SIZE) \
645  dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
646  dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
647  dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
648  dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
649  dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
650  dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
651  dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
652  dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
653  dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
654 
655 #define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \
656  dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
657  dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
658  dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
659  dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
660  dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
661  dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
662  dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
663  dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
664  dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
665 
667 {
668  VP78_MC_FUNC(0, 16);
669  VP78_MC_FUNC(1, 8);
670  VP78_MC_FUNC(2, 4);
671 
672  VP78_BILINEAR_MC_FUNC(0, 16);
673  VP78_BILINEAR_MC_FUNC(1, 8);
674  VP78_BILINEAR_MC_FUNC(2, 4);
675 
676  if (ARCH_AARCH64)
678  if (ARCH_ARM)
679  ff_vp78dsp_init_arm(dsp);
680  if (ARCH_PPC)
681  ff_vp78dsp_init_ppc(dsp);
682  if (ARCH_X86)
683  ff_vp78dsp_init_x86(dsp);
684 }
685 
686 #if CONFIG_VP7_DECODER
687 LOOP_FILTERS(vp7)
688 
690 {
691  dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c;
692  dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c;
693  dsp->vp8_idct_add = vp7_idct_add_c;
694  dsp->vp8_idct_dc_add = vp7_idct_dc_add_c;
695  dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c;
696  dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c;
697 
698  dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c;
699  dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c;
700  dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c;
701  dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c;
702 
703  dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c;
704  dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c;
705  dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c;
706  dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c;
707 
708  dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
709  dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
710 }
711 #endif /* CONFIG_VP7_DECODER */
712 
713 #if CONFIG_VP8_DECODER
714 LOOP_FILTERS(vp8)
715 
717 {
718  dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
719  dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
720  dsp->vp8_idct_add = vp8_idct_add_c;
721  dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
722  dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c;
723  dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
724 
725  dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
726  dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
727  dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
728  dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
729 
730  dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
731  dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
732  dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
733  dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
734 
735  dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
736  dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
737 
738  if (ARCH_AARCH64)
740  if (ARCH_ARM)
741  ff_vp8dsp_init_arm(dsp);
742  if (ARCH_X86)
743  ff_vp8dsp_init_x86(dsp);
744  if (ARCH_MIPS)
745  ff_vp8dsp_init_mips(dsp);
746 }
747 #endif /* CONFIG_VP8_DECODER */
const char const char void * val
Definition: avisynth_c.h:863
void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:41
#define MK_IDCT_DC_ADD4_C(name)
Definition: vp8dsp.c:34
void ff_vp7dsp_init(VP8DSPContext *c)
static const uint8_t subpel_filters[7][6]
Definition: vp8dsp.c:457
void(* vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:42
#define a0
Definition: regdef.h:46
av_cold void ff_vp78dsp_init_ppc(VP8DSPContext *c)
#define MAX_NEG_CROP
Definition: mathops.h:31
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
Definition: vp8dsp.c:303
#define a1
Definition: regdef.h:47
void(* vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:62
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:666
av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp)
static const uint8_t q1[256]
Definition: twofish.c:96
av_cold void ff_vp8dsp_init_aarch64(VP8DSPContext *dsp)
av_cold void ff_vp78dsp_init_arm(VP8DSPContext *dsp)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
Definition: vp8dsp.c:349
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
#define av_cold
Definition: attributes.h:82
#define clip_int8(n)
Definition: vp8dsp.c:259
void(* vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:39
#define VP8_EPEL_H(SIZE, TAPS)
Definition: vp8dsp.c:490
void(* vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
Definition: vp8dsp.h:44
VP8 compatible video decoder.
#define t0
Definition: regdef.h:28
av_cold void ff_vp8dsp_init_mips(VP8DSPContext *dsp)
static const uint64_t c1
Definition: murmur3.c:49
void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:52
#define cm
Definition: dvbsubdec.c:37
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
Definition: vp8dsp.c:316
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define VP78_BILINEAR_MC_FUNC(IDX, SIZE)
Definition: vp8dsp.c:655
#define NORMAL_LIMIT(vpn)
E - limit at the macroblock edge I - limit for interior difference.
Definition: vp8dsp.c:327
void(* vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
Definition: vp8dsp.h:40
#define t1
Definition: regdef.h:29
av_cold void ff_vp78dsp_init_aarch64(VP8DSPContext *dsp)
#define t3
Definition: regdef.h:31
#define VP8_BILINEAR(SIZE)
Definition: vp8dsp.c:585
static const uint8_t q0[256]
Definition: twofish.c:77
#define FFMIN(a, b)
Definition: common.h:96
uint8_t w
Definition: llviddspenc.c:38
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)
Definition: vp8dsp.c:526
void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:65
static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
Definition: vp8dsp.c:309
#define a2
Definition: regdef.h:48
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define IS_VP8
Definition: vp8dsp.h:106
void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:48
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
void ff_vp78dsp_init_x86(VP8DSPContext *c)
Definition: vp8dsp_init.c:319
void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:54
static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap, int is_vp7)
Definition: vp8dsp.c:261
#define VP8_EPEL_V(SIZE, TAPS)
Definition: vp8dsp.c:508
void ff_vp8dsp_init_x86(VP8DSPContext *c)
Definition: vp8dsp_init.c:369
#define LOAD_PIXELS
Definition: vp8dsp.c:249
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
Definition: vp8dsp.c:343
void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:60
void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])
Definition: vp8dsp.h:38
void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:69
void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)
Definition: vp8dsp.h:70
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
int
#define AV_ZERO64(d)
Definition: intreadwrite.h:633
common internal and external API header
void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:58
#define ff_crop_tab
#define IS_VP7
Definition: vp8dsp.h:105
#define av_always_inline
Definition: attributes.h:39
#define stride
void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Definition: vp8dsp.h:50
#define LOOP_FILTERS(vpn)
Definition: vp8dsp.c:450
void ff_vp8dsp_init(VP8DSPContext *c)
#define VP78_MC_FUNC(IDX, SIZE)
Definition: vp8dsp.c:644
#define PUT_PIXELS(WIDTH)
Definition: vp8dsp.c:467
#define t2
Definition: regdef.h:30
static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
Definition: vp8dsp.c:297
static uint8_t tmp[11]
Definition: aes_ctr.c:26