FFmpeg
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "avcodec.h"
27 #include "internal.h"
28 #include "videodsp.h"
29 #include "vp9data.h"
30 #include "vp9dec.h"
31 
33  uint8_t *dst_edge, ptrdiff_t stride_edge,
34  uint8_t *dst_inner, ptrdiff_t stride_inner,
35  uint8_t *l, int col, int x, int w,
36  int row, int y, enum TxfmMode tx,
37  int p, int ss_h, int ss_v, int bytesperpixel)
38 {
39  VP9Context *s = td->s;
40  int have_top = row > 0 || y > 0;
41  int have_left = col > td->tile_col_start || x > 0;
42  int have_right = x < w - 1;
43  int bpp = s->s.h.bpp;
44  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
45  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
46  { DC_127_PRED, VERT_PRED } },
47  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
48  { HOR_PRED, HOR_PRED } },
49  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
50  { LEFT_DC_PRED, DC_PRED } },
62  { HOR_UP_PRED, HOR_UP_PRED } },
64  { HOR_PRED, TM_VP8_PRED } },
65  };
66  static const struct {
67  uint8_t needs_left:1;
68  uint8_t needs_top:1;
69  uint8_t needs_topleft:1;
70  uint8_t needs_topright:1;
71  uint8_t invert_left:1;
72  } edges[N_INTRA_PRED_MODES] = {
73  [VERT_PRED] = { .needs_top = 1 },
74  [HOR_PRED] = { .needs_left = 1 },
75  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
76  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
77  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
78  .needs_topleft = 1 },
79  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
80  .needs_topleft = 1 },
81  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
82  .needs_topleft = 1 },
83  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
84  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
85  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
86  .needs_topleft = 1 },
87  [LEFT_DC_PRED] = { .needs_left = 1 },
88  [TOP_DC_PRED] = { .needs_top = 1 },
89  [DC_128_PRED] = { 0 },
90  [DC_127_PRED] = { 0 },
91  [DC_129_PRED] = { 0 }
92  };
93 
94  av_assert2(mode >= 0 && mode < 10);
95  mode = mode_conv[mode][have_left][have_top];
96  if (edges[mode].needs_top) {
97  uint8_t *top, *topleft;
98  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
99  int n_px_need_tr = 0;
100 
101  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
102  n_px_need_tr = 4;
103 
104  // if top of sb64-row, use s->intra_pred_data[] instead of
105  // dst[-stride] for intra prediction (it contains pre- instead of
106  // post-loopfilter data)
107  if (have_top) {
108  top = !(row & 7) && !y ?
109  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
110  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
111  if (have_left)
112  topleft = !(row & 7) && !y ?
113  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
114  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
115  &dst_inner[-stride_inner];
116  }
117 
118  if (have_top &&
119  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
120  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
121  n_px_need + n_px_need_tr <= n_px_have) {
122  *a = top;
123  } else {
124  if (have_top) {
125  if (n_px_need <= n_px_have) {
126  memcpy(*a, top, n_px_need * bytesperpixel);
127  } else {
128 #define memset_bpp(c, i1, v, i2, num) do { \
129  if (bytesperpixel == 1) { \
130  memset(&(c)[(i1)], (v)[(i2)], (num)); \
131  } else { \
132  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
133  for (n = 0; n < (num); n++) { \
134  AV_WN16A(&(c)[((i1) + n) * 2], val); \
135  } \
136  } \
137 } while (0)
138  memcpy(*a, top, n_px_have * bytesperpixel);
139  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
140  }
141  } else {
142 #define memset_val(c, val, num) do { \
143  if (bytesperpixel == 1) { \
144  memset((c), (val), (num)); \
145  } else { \
146  int n; \
147  for (n = 0; n < (num); n++) { \
148  AV_WN16A(&(c)[n * 2], (val)); \
149  } \
150  } \
151 } while (0)
152  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
153  }
154  if (edges[mode].needs_topleft) {
155  if (have_left && have_top) {
156 #define assign_bpp(c, i1, v, i2) do { \
157  if (bytesperpixel == 1) { \
158  (c)[(i1)] = (v)[(i2)]; \
159  } else { \
160  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
161  } \
162 } while (0)
163  assign_bpp(*a, -1, topleft, -1);
164  } else {
165 #define assign_val(c, i, v) do { \
166  if (bytesperpixel == 1) { \
167  (c)[(i)] = (v); \
168  } else { \
169  AV_WN16A(&(c)[(i) * 2], (v)); \
170  } \
171 } while (0)
172  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
173  }
174  }
175  if (tx == TX_4X4 && edges[mode].needs_topright) {
176  if (have_top && have_right &&
177  n_px_need + n_px_need_tr <= n_px_have) {
178  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
179  } else {
180  memset_bpp(*a, 4, *a, 3, 4);
181  }
182  }
183  }
184  }
185  if (edges[mode].needs_left) {
186  if (have_left) {
187  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
188  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
189  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
190 
191  if (edges[mode].invert_left) {
192  if (n_px_need <= n_px_have) {
193  for (i = 0; i < n_px_need; i++)
194  assign_bpp(l, i, &dst[i * stride], -1);
195  } else {
196  for (i = 0; i < n_px_have; i++)
197  assign_bpp(l, i, &dst[i * stride], -1);
198  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
199  }
200  } else {
201  if (n_px_need <= n_px_have) {
202  for (i = 0; i < n_px_need; i++)
203  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
204  } else {
205  for (i = 0; i < n_px_have; i++)
206  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
207  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
208  }
209  }
210  } else {
211  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
212  }
213  }
214 
215  return mode;
216 }
217 
218 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
219  ptrdiff_t uv_off, int bytesperpixel)
220 {
221  VP9Context *s = td->s;
222  VP9Block *b = td->b;
223  int row = td->row, col = td->col;
224  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
225  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
226  int end_x = FFMIN(2 * (s->cols - col), w4);
227  int end_y = FFMIN(2 * (s->rows - row), h4);
228  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
229  int uvstep1d = 1 << b->uvtx, p;
230  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
231  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
232  LOCAL_ALIGNED_32(uint8_t, l, [64]);
233 
234  for (n = 0, y = 0; y < end_y; y += step1d) {
235  uint8_t *ptr = dst, *ptr_r = dst_r;
236  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
237  ptr_r += 4 * step1d * bytesperpixel, n += step) {
238  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
239  y * 2 + x : 0];
240  uint8_t *a = &a_buf[32];
241  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
242  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
243 
244  mode = check_intra_mode(td, mode, &a, ptr_r,
245  s->s.frames[CUR_FRAME].tf.f->linesize[0],
246  ptr, td->y_stride, l,
247  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
248  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
249  if (eob)
250  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
251  td->block + 16 * n * bytesperpixel, eob);
252  }
253  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
254  dst += 4 * step1d * td->y_stride;
255  }
256 
257  // U/V
258  w4 >>= s->ss_h;
259  end_x >>= s->ss_h;
260  end_y >>= s->ss_v;
261  step = 1 << (b->uvtx * 2);
262  for (p = 0; p < 2; p++) {
263  dst = td->dst[1 + p];
264  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
265  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
266  uint8_t *ptr = dst, *ptr_r = dst_r;
267  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
268  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
269  int mode = b->uvmode;
270  uint8_t *a = &a_buf[32];
271  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
272 
273  mode = check_intra_mode(td, mode, &a, ptr_r,
274  s->s.frames[CUR_FRAME].tf.f->linesize[1],
275  ptr, td->uv_stride, l, col, x, w4, row, y,
276  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
277  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
278  if (eob)
279  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
280  td->uvblock[p] + 16 * n * bytesperpixel, eob);
281  }
282  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
283  dst += 4 * uvstep1d * td->uv_stride;
284  }
285  }
286 }
287 
288 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
289 {
290  intra_recon(td, y_off, uv_off, 1);
291 }
292 
293 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
294 {
295  intra_recon(td, y_off, uv_off, 2);
296 }
297 
299  uint8_t *dst, ptrdiff_t dst_stride,
300  const uint8_t *ref, ptrdiff_t ref_stride,
301  ThreadFrame *ref_frame,
302  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
303  int bw, int bh, int w, int h, int bytesperpixel)
304 {
305  VP9Context *s = td->s;
306  int mx = mv->x, my = mv->y, th;
307 
308  y += my >> 3;
309  x += mx >> 3;
310  ref += y * ref_stride + x * bytesperpixel;
311  mx &= 7;
312  my &= 7;
313  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
314  // we use +7 because the last 7 pixels of each sbrow can be changed in
315  // the longest loopfilter of the next sbrow
316  th = (y + bh + 4 * !!my + 7) >> 6;
317  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
318  // The arm/aarch64 _hv filters read one more row than what actually is
319  // needed, so switch to emulated edge one pixel sooner vertically
320  // (!!my * 5) than horizontally (!!mx * 4).
321  if (x < !!mx * 3 || y < !!my * 3 ||
322  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
323  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
324  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
325  160, ref_stride,
326  bw + !!mx * 7, bh + !!my * 7,
327  x - !!mx * 3, y - !!my * 3, w, h);
328  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
329  ref_stride = 160;
330  }
331  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
332 }
333 
335  uint8_t *dst_u, uint8_t *dst_v,
336  ptrdiff_t dst_stride,
337  const uint8_t *ref_u, ptrdiff_t src_stride_u,
338  const uint8_t *ref_v, ptrdiff_t src_stride_v,
339  ThreadFrame *ref_frame,
340  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
341  int bw, int bh, int w, int h, int bytesperpixel)
342 {
343  VP9Context *s = td->s;
344  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
345 
346  y += my >> 4;
347  x += mx >> 4;
348  ref_u += y * src_stride_u + x * bytesperpixel;
349  ref_v += y * src_stride_v + x * bytesperpixel;
350  mx &= 15;
351  my &= 15;
352  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
353  // we use +7 because the last 7 pixels of each sbrow can be changed in
354  // the longest loopfilter of the next sbrow
355  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
356  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
357  // The arm/aarch64 _hv filters read one more row than what actually is
358  // needed, so switch to emulated edge one pixel sooner vertically
359  // (!!my * 5) than horizontally (!!mx * 4).
360  if (x < !!mx * 3 || y < !!my * 3 ||
361  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
362  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
363  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
364  160, src_stride_u,
365  bw + !!mx * 7, bh + !!my * 7,
366  x - !!mx * 3, y - !!my * 3, w, h);
367  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
368  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
369 
370  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
371  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
372  160, src_stride_v,
373  bw + !!mx * 7, bh + !!my * 7,
374  x - !!mx * 3, y - !!my * 3, w, h);
375  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
376  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
377  } else {
378  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
379  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
380  }
381 }
382 
383 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
384  px, py, pw, ph, bw, bh, w, h, i) \
385  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
386  mv, bw, bh, w, h, bytesperpixel)
387 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
388  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
389  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
390  row, col, mv, bw, bh, w, h, bytesperpixel)
391 #define SCALED 0
392 #define FN(x) x##_8bpp
393 #define BYTES_PER_PIXEL 1
394 #include "vp9_mc_template.c"
395 #undef FN
396 #undef BYTES_PER_PIXEL
397 #define FN(x) x##_16bpp
398 #define BYTES_PER_PIXEL 2
399 #include "vp9_mc_template.c"
400 #undef mc_luma_dir
401 #undef mc_chroma_dir
402 #undef FN
403 #undef BYTES_PER_PIXEL
404 #undef SCALED
405 
407  vp9_mc_func (*mc)[2],
408  uint8_t *dst, ptrdiff_t dst_stride,
409  const uint8_t *ref, ptrdiff_t ref_stride,
410  ThreadFrame *ref_frame,
411  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
412  int px, int py, int pw, int ph,
413  int bw, int bh, int w, int h, int bytesperpixel,
414  const uint16_t *scale, const uint8_t *step)
415 {
416  VP9Context *s = td->s;
417  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
418  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
419  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
420  y, x, in_mv, bw, bh, w, h, bytesperpixel);
421  } else {
422 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
423  int mx, my;
424  int refbw_m1, refbh_m1;
425  int th;
426  VP56mv mv;
427 
428  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
429  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
430  // BUG libvpx seems to scale the two components separately. This introduces
431  // rounding errors but we have to reproduce them to be exactly compatible
432  // with the output from libvpx...
433  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
434  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
435 
436  y = my >> 4;
437  x = mx >> 4;
438  ref += y * ref_stride + x * bytesperpixel;
439  mx &= 15;
440  my &= 15;
441  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
442  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
443  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
444  // we use +7 because the last 7 pixels of each sbrow can be changed in
445  // the longest loopfilter of the next sbrow
446  th = (y + refbh_m1 + 4 + 7) >> 6;
447  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
448  // The arm/aarch64 _hv filters read one more row than what actually is
449  // needed, so switch to emulated edge one pixel sooner vertically
450  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
451  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
452  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
453  ref - 3 * ref_stride - 3 * bytesperpixel,
454  288, ref_stride,
455  refbw_m1 + 8, refbh_m1 + 8,
456  x - 3, y - 3, w, h);
457  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
458  ref_stride = 288;
459  }
460  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
461  }
462 }
463 
465  vp9_mc_func (*mc)[2],
466  uint8_t *dst_u, uint8_t *dst_v,
467  ptrdiff_t dst_stride,
468  const uint8_t *ref_u, ptrdiff_t src_stride_u,
469  const uint8_t *ref_v, ptrdiff_t src_stride_v,
470  ThreadFrame *ref_frame,
471  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
472  int px, int py, int pw, int ph,
473  int bw, int bh, int w, int h, int bytesperpixel,
474  const uint16_t *scale, const uint8_t *step)
475 {
476  VP9Context *s = td->s;
477  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
478  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
479  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
480  ref_v, src_stride_v, ref_frame,
481  y, x, in_mv, bw, bh, w, h, bytesperpixel);
482  } else {
483  int mx, my;
484  int refbw_m1, refbh_m1;
485  int th;
486  VP56mv mv;
487 
488  if (s->ss_h) {
489  // BUG https://code.google.com/p/webm/issues/detail?id=820
490  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
491  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
492  } else {
493  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
494  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
495  }
496  if (s->ss_v) {
497  // BUG https://code.google.com/p/webm/issues/detail?id=820
498  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
499  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
500  } else {
501  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
502  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
503  }
504 #undef scale_mv
505  y = my >> 4;
506  x = mx >> 4;
507  ref_u += y * src_stride_u + x * bytesperpixel;
508  ref_v += y * src_stride_v + x * bytesperpixel;
509  mx &= 15;
510  my &= 15;
511  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
512  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
513  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
514  // we use +7 because the last 7 pixels of each sbrow can be changed in
515  // the longest loopfilter of the next sbrow
516  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
517  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
518  // The arm/aarch64 _hv filters read one more row than what actually is
519  // needed, so switch to emulated edge one pixel sooner vertically
520  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
521  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
522  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
523  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
524  288, src_stride_u,
525  refbw_m1 + 8, refbh_m1 + 8,
526  x - 3, y - 3, w, h);
527  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
528  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
529 
530  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
531  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
532  288, src_stride_v,
533  refbw_m1 + 8, refbh_m1 + 8,
534  x - 3, y - 3, w, h);
535  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
536  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
537  } else {
538  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
539  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
540  }
541  }
542 }
543 
544 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
545  px, py, pw, ph, bw, bh, w, h, i) \
546  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
547  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
548  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
549 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
550  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
551  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
552  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
553  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
554 #define SCALED 1
555 #define FN(x) x##_scaled_8bpp
556 #define BYTES_PER_PIXEL 1
557 #include "vp9_mc_template.c"
558 #undef FN
559 #undef BYTES_PER_PIXEL
560 #define FN(x) x##_scaled_16bpp
561 #define BYTES_PER_PIXEL 2
562 #include "vp9_mc_template.c"
563 #undef mc_luma_dir
564 #undef mc_chroma_dir
565 #undef FN
566 #undef BYTES_PER_PIXEL
567 #undef SCALED
568 
569 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
570 {
571  VP9Context *s = td->s;
572  VP9Block *b = td->b;
573  int row = td->row, col = td->col;
574 
575  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
576  if (bytesperpixel == 1) {
577  inter_pred_scaled_8bpp(td);
578  } else {
579  inter_pred_scaled_16bpp(td);
580  }
581  } else {
582  if (bytesperpixel == 1) {
583  inter_pred_8bpp(td);
584  } else {
585  inter_pred_16bpp(td);
586  }
587  }
588 
589  if (!b->skip) {
590  /* mostly copied intra_recon() */
591 
592  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
593  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
594  int end_x = FFMIN(2 * (s->cols - col), w4);
595  int end_y = FFMIN(2 * (s->rows - row), h4);
596  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
597  int uvstep1d = 1 << b->uvtx, p;
598  uint8_t *dst = td->dst[0];
599 
600  // y itxfm add
601  for (n = 0, y = 0; y < end_y; y += step1d) {
602  uint8_t *ptr = dst;
603  for (x = 0; x < end_x; x += step1d,
604  ptr += 4 * step1d * bytesperpixel, n += step) {
605  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
606 
607  if (eob)
608  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
609  td->block + 16 * n * bytesperpixel, eob);
610  }
611  dst += 4 * td->y_stride * step1d;
612  }
613 
614  // uv itxfm add
615  end_x >>= s->ss_h;
616  end_y >>= s->ss_v;
617  step = 1 << (b->uvtx * 2);
618  for (p = 0; p < 2; p++) {
619  dst = td->dst[p + 1];
620  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
621  uint8_t *ptr = dst;
622  for (x = 0; x < end_x; x += uvstep1d,
623  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
624  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
625 
626  if (eob)
627  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
628  td->uvblock[p] + 16 * n * bytesperpixel, eob);
629  }
630  dst += 4 * uvstep1d * td->uv_stride;
631  }
632  }
633  }
634 }
635 
637 {
638  inter_recon(td, 1);
639 }
640 
642 {
643  inter_recon(td, 2);
644 }
stride
int stride
Definition: mace.c:144
td
#define td
Definition: regdef.h:70
VP56mv::x
int16_t x
Definition: vp56.h:67
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
n
int n
Definition: avisynth_c.h:760
mv
static const int8_t mv[256][2]
Definition: 4xm.c:77
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:353
w
uint8_t w
Definition: llviddspenc.c:38
internal.h
DC_PRED
@ DC_PRED
Definition: vp9.h:48
b
#define b
Definition: input.c:41
VERT_LEFT_PRED
@ VERT_LEFT_PRED
Definition: vp9.h:53
inter_recon
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:569
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:293
ThreadFrame::f
AVFrame * f
Definition: thread.h:35
VP9Block
Definition: vp9dec.h:79
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
VERT_PRED
@ VERT_PRED
Definition: vp9.h:46
assign_val
#define assign_val(c, i, v)
check_intra_mode
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:32
DIAG_DOWN_RIGHT_PRED
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
avassert.h
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
s
#define s(width, name)
Definition: cbs_vp9.c:257
vp9data.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
VP56mv::y
int16_t y
Definition: vp56.h:68
VP56mv
Definition: vp56.h:66
memset_val
#define memset_val(c, val, num)
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:641
assign_bpp
#define assign_bpp(c, i1, v, i2)
VP9Context
Definition: vp9dec.h:91
mc_chroma_scaled
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:464
TX_8X8
@ TX_8X8
Definition: vp9.h:29
vp9_mc_template.c
TxfmMode
TxfmMode
Definition: vp9.h:27
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
TxfmType
TxfmType
Definition: vp9.h:37
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
mc_luma_unscaled
static av_always_inline void mc_luma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:298
VERT_RIGHT_PRED
@ VERT_RIGHT_PRED
Definition: vp9.h:51
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
scale_mv
#define scale_mv(n, dim)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
th
#define th
Definition: regdef.h:75
mc_chroma_unscaled
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:334
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
ff_vp9_intra_txfm_type
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
av_always_inline
#define av_always_inline
Definition: attributes.h:43
uint8_t
uint8_t
Definition: audio_convert.c:194
mc_luma_scaled
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:406
avcodec.h
VP9TileData
Definition: vp9dec.h:157
AVFrame::height
int height
Definition: frame.h:353
ThreadFrame
Definition: thread.h:34
HOR_UP_PRED
@ HOR_UP_PRED
Definition: vp9.h:54
mode
mode
Definition: ebur128.h:83
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
HOR_DOWN_PRED
@ HOR_DOWN_PRED
Definition: vp9.h:52
vp9dec.h
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
videodsp.h
DIAG_DOWN_LEFT_PRED
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
memset_bpp
#define memset_bpp(c, i1, v, i2, num)
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:636
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: internal.h:137
h
h
Definition: vp9dsp_template.c:2038
intra_recon
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:218
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:288
mc
#define mc
Definition: vf_colormatrix.c:102