FFmpeg
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32 
34  uint8_t *dst_edge, ptrdiff_t stride_edge,
35  uint8_t *dst_inner, ptrdiff_t stride_inner,
36  uint8_t *l, int col, int x, int w,
37  int row, int y, enum TxfmMode tx,
38  int p, int ss_h, int ss_v, int bytesperpixel)
39 {
40  VP9Context *s = td->s;
41  int have_top = row > 0 || y > 0;
42  int have_left = col > td->tile_col_start || x > 0;
43  int have_right = x < w - 1;
44  int bpp = s->s.h.bpp;
45  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
46  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
47  { DC_127_PRED, VERT_PRED } },
48  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
49  { HOR_PRED, HOR_PRED } },
50  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
51  { LEFT_DC_PRED, DC_PRED } },
63  { HOR_UP_PRED, HOR_UP_PRED } },
65  { HOR_PRED, TM_VP8_PRED } },
66  };
67  static const struct {
68  uint8_t needs_left:1;
69  uint8_t needs_top:1;
70  uint8_t needs_topleft:1;
71  uint8_t needs_topright:1;
72  uint8_t invert_left:1;
73  } edges[N_INTRA_PRED_MODES] = {
74  [VERT_PRED] = { .needs_top = 1 },
75  [HOR_PRED] = { .needs_left = 1 },
76  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
77  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
78  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79  .needs_topleft = 1 },
80  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
81  .needs_topleft = 1 },
82  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
83  .needs_topleft = 1 },
84  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
85  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
86  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
87  .needs_topleft = 1 },
88  [LEFT_DC_PRED] = { .needs_left = 1 },
89  [TOP_DC_PRED] = { .needs_top = 1 },
90  [DC_128_PRED] = { 0 },
91  [DC_127_PRED] = { 0 },
92  [DC_129_PRED] = { 0 }
93  };
94 
95  av_assert2(mode >= 0 && mode < 10);
96  mode = mode_conv[mode][have_left][have_top];
97  if (edges[mode].needs_top) {
98  uint8_t *top, *topleft;
99  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
100  int n_px_need_tr = 0;
101 
102  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
103  n_px_need_tr = 4;
104 
105  // if top of sb64-row, use s->intra_pred_data[] instead of
106  // dst[-stride] for intra prediction (it contains pre- instead of
107  // post-loopfilter data)
108  if (have_top) {
109  top = !(row & 7) && !y ?
110  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
111  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
112  if (have_left)
113  topleft = !(row & 7) && !y ?
114  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
115  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
116  &dst_inner[-stride_inner];
117  }
118 
119  if (have_top &&
120  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
121  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
122  n_px_need + n_px_need_tr <= n_px_have) {
123  *a = top;
124  } else {
125  if (have_top) {
126  if (n_px_need <= n_px_have) {
127  memcpy(*a, top, n_px_need * bytesperpixel);
128  } else {
129 #define memset_bpp(c, i1, v, i2, num) do { \
130  if (bytesperpixel == 1) { \
131  memset(&(c)[(i1)], (v)[(i2)], (num)); \
132  } else { \
133  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
134  for (n = 0; n < (num); n++) { \
135  AV_WN16A(&(c)[((i1) + n) * 2], val); \
136  } \
137  } \
138 } while (0)
139  memcpy(*a, top, n_px_have * bytesperpixel);
140  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
141  }
142  } else {
143 #define memset_val(c, val, num) do { \
144  if (bytesperpixel == 1) { \
145  memset((c), (val), (num)); \
146  } else { \
147  int n; \
148  for (n = 0; n < (num); n++) { \
149  AV_WN16A(&(c)[n * 2], (val)); \
150  } \
151  } \
152 } while (0)
153  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
154  }
155  if (edges[mode].needs_topleft) {
156  if (have_left && have_top) {
157 #define assign_bpp(c, i1, v, i2) do { \
158  if (bytesperpixel == 1) { \
159  (c)[(i1)] = (v)[(i2)]; \
160  } else { \
161  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
162  } \
163 } while (0)
164  assign_bpp(*a, -1, topleft, -1);
165  } else {
166 #define assign_val(c, i, v) do { \
167  if (bytesperpixel == 1) { \
168  (c)[(i)] = (v); \
169  } else { \
170  AV_WN16A(&(c)[(i) * 2], (v)); \
171  } \
172 } while (0)
173  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
174  }
175  }
176  if (tx == TX_4X4 && edges[mode].needs_topright) {
177  if (have_top && have_right &&
178  n_px_need + n_px_need_tr <= n_px_have) {
179  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
180  } else {
181  memset_bpp(*a, 4, *a, 3, 4);
182  }
183  }
184  }
185  }
186  if (edges[mode].needs_left) {
187  if (have_left) {
188  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
189  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
190  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
191 
192  if (edges[mode].invert_left) {
193  if (n_px_need <= n_px_have) {
194  for (i = 0; i < n_px_need; i++)
195  assign_bpp(l, i, &dst[i * stride], -1);
196  } else {
197  for (i = 0; i < n_px_have; i++)
198  assign_bpp(l, i, &dst[i * stride], -1);
199  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
200  }
201  } else {
202  if (n_px_need <= n_px_have) {
203  for (i = 0; i < n_px_need; i++)
204  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
205  } else {
206  for (i = 0; i < n_px_have; i++)
207  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
208  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
209  }
210  }
211  } else {
212  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
213  }
214  }
215 
216  return mode;
217 }
218 
219 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
220  ptrdiff_t uv_off, int bytesperpixel)
221 {
222  VP9Context *s = td->s;
223  VP9Block *b = td->b;
224  int row = td->row, col = td->col;
225  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
226  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
227  int end_x = FFMIN(2 * (s->cols - col), w4);
228  int end_y = FFMIN(2 * (s->rows - row), h4);
229  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
230  int uvstep1d = 1 << b->uvtx, p;
231  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
232  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
233  LOCAL_ALIGNED_32(uint8_t, l, [64]);
234 
235  for (n = 0, y = 0; y < end_y; y += step1d) {
236  uint8_t *ptr = dst, *ptr_r = dst_r;
237  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
238  ptr_r += 4 * step1d * bytesperpixel, n += step) {
239  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
240  y * 2 + x : 0];
241  uint8_t *a = &a_buf[32];
242  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
243  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
244 
245  mode = check_intra_mode(td, mode, &a, ptr_r,
246  s->s.frames[CUR_FRAME].tf.f->linesize[0],
247  ptr, td->y_stride, l,
248  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
249  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
250  if (eob)
251  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
252  td->block + 16 * n * bytesperpixel, eob);
253  }
254  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
255  dst += 4 * step1d * td->y_stride;
256  }
257 
258  // U/V
259  w4 >>= s->ss_h;
260  end_x >>= s->ss_h;
261  end_y >>= s->ss_v;
262  step = 1 << (b->uvtx * 2);
263  for (p = 0; p < 2; p++) {
264  dst = td->dst[1 + p];
265  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
266  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
267  uint8_t *ptr = dst, *ptr_r = dst_r;
268  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
269  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
270  int mode = b->uvmode;
271  uint8_t *a = &a_buf[32];
272  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
273 
274  mode = check_intra_mode(td, mode, &a, ptr_r,
275  s->s.frames[CUR_FRAME].tf.f->linesize[1],
276  ptr, td->uv_stride, l, col, x, w4, row, y,
277  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
278  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
279  if (eob)
280  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
281  td->uvblock[p] + 16 * n * bytesperpixel, eob);
282  }
283  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
284  dst += 4 * uvstep1d * td->uv_stride;
285  }
286  }
287 }
288 
289 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
290 {
291  intra_recon(td, y_off, uv_off, 1);
292 }
293 
294 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
295 {
296  intra_recon(td, y_off, uv_off, 2);
297 }
298 
300  uint8_t *dst, ptrdiff_t dst_stride,
301  const uint8_t *ref, ptrdiff_t ref_stride,
302  ThreadFrame *ref_frame,
303  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
304  int bw, int bh, int w, int h, int bytesperpixel)
305 {
306  VP9Context *s = td->s;
307  int mx = mv->x, my = mv->y, th;
308 
309  y += my >> 3;
310  x += mx >> 3;
311  ref += y * ref_stride + x * bytesperpixel;
312  mx &= 7;
313  my &= 7;
314  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
315  // we use +7 because the last 7 pixels of each sbrow can be changed in
316  // the longest loopfilter of the next sbrow
317  th = (y + bh + 4 * !!my + 7) >> 6;
318  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
319  // The arm/aarch64 _hv filters read one more row than what actually is
320  // needed, so switch to emulated edge one pixel sooner vertically
321  // (!!my * 5) than horizontally (!!mx * 4).
322  if (x < !!mx * 3 || y < !!my * 3 ||
323  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
324  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
325  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
326  160, ref_stride,
327  bw + !!mx * 7, bh + !!my * 7,
328  x - !!mx * 3, y - !!my * 3, w, h);
329  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
330  ref_stride = 160;
331  }
332  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
333 }
334 
336  uint8_t *dst_u, uint8_t *dst_v,
337  ptrdiff_t dst_stride,
338  const uint8_t *ref_u, ptrdiff_t src_stride_u,
339  const uint8_t *ref_v, ptrdiff_t src_stride_v,
340  ThreadFrame *ref_frame,
341  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
342  int bw, int bh, int w, int h, int bytesperpixel)
343 {
344  VP9Context *s = td->s;
345  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
346 
347  y += my >> 4;
348  x += mx >> 4;
349  ref_u += y * src_stride_u + x * bytesperpixel;
350  ref_v += y * src_stride_v + x * bytesperpixel;
351  mx &= 15;
352  my &= 15;
353  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
354  // we use +7 because the last 7 pixels of each sbrow can be changed in
355  // the longest loopfilter of the next sbrow
356  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
357  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
358  // The arm/aarch64 _hv filters read one more row than what actually is
359  // needed, so switch to emulated edge one pixel sooner vertically
360  // (!!my * 5) than horizontally (!!mx * 4).
361  if (x < !!mx * 3 || y < !!my * 3 ||
362  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
363  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
364  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
365  160, src_stride_u,
366  bw + !!mx * 7, bh + !!my * 7,
367  x - !!mx * 3, y - !!my * 3, w, h);
368  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
369  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
370 
371  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
372  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
373  160, src_stride_v,
374  bw + !!mx * 7, bh + !!my * 7,
375  x - !!mx * 3, y - !!my * 3, w, h);
376  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
377  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
378  } else {
379  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
380  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
381  }
382 }
383 
384 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
385  px, py, pw, ph, bw, bh, w, h, i) \
386  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
387  mv, bw, bh, w, h, bytesperpixel)
388 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
389  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
390  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
391  row, col, mv, bw, bh, w, h, bytesperpixel)
392 #define SCALED 0
393 #define FN(x) x##_8bpp
394 #define BYTES_PER_PIXEL 1
395 #include "vp9_mc_template.c"
396 #undef FN
397 #undef BYTES_PER_PIXEL
398 #define FN(x) x##_16bpp
399 #define BYTES_PER_PIXEL 2
400 #include "vp9_mc_template.c"
401 #undef mc_luma_dir
402 #undef mc_chroma_dir
403 #undef FN
404 #undef BYTES_PER_PIXEL
405 #undef SCALED
406 
408  vp9_mc_func (*mc)[2],
409  uint8_t *dst, ptrdiff_t dst_stride,
410  const uint8_t *ref, ptrdiff_t ref_stride,
411  ThreadFrame *ref_frame,
412  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
413  int px, int py, int pw, int ph,
414  int bw, int bh, int w, int h, int bytesperpixel,
415  const uint16_t *scale, const uint8_t *step)
416 {
417  VP9Context *s = td->s;
418  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
419  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
420  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
421  y, x, in_mv, bw, bh, w, h, bytesperpixel);
422  } else {
423 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
424  int mx, my;
425  int refbw_m1, refbh_m1;
426  int th;
427  VP56mv mv;
428 
429  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
430  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
431  // BUG libvpx seems to scale the two components separately. This introduces
432  // rounding errors but we have to reproduce them to be exactly compatible
433  // with the output from libvpx...
434  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
435  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
436 
437  y = my >> 4;
438  x = mx >> 4;
439  ref += y * ref_stride + x * bytesperpixel;
440  mx &= 15;
441  my &= 15;
442  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
443  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
444  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
445  // we use +7 because the last 7 pixels of each sbrow can be changed in
446  // the longest loopfilter of the next sbrow
447  th = (y + refbh_m1 + 4 + 7) >> 6;
448  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
449  // The arm/aarch64 _hv filters read one more row than what actually is
450  // needed, so switch to emulated edge one pixel sooner vertically
451  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
452  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
453  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
454  ref - 3 * ref_stride - 3 * bytesperpixel,
455  288, ref_stride,
456  refbw_m1 + 8, refbh_m1 + 8,
457  x - 3, y - 3, w, h);
458  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
459  ref_stride = 288;
460  }
461  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
462  }
463 }
464 
466  vp9_mc_func (*mc)[2],
467  uint8_t *dst_u, uint8_t *dst_v,
468  ptrdiff_t dst_stride,
469  const uint8_t *ref_u, ptrdiff_t src_stride_u,
470  const uint8_t *ref_v, ptrdiff_t src_stride_v,
471  ThreadFrame *ref_frame,
472  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
473  int px, int py, int pw, int ph,
474  int bw, int bh, int w, int h, int bytesperpixel,
475  const uint16_t *scale, const uint8_t *step)
476 {
477  VP9Context *s = td->s;
478  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
479  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
480  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
481  ref_v, src_stride_v, ref_frame,
482  y, x, in_mv, bw, bh, w, h, bytesperpixel);
483  } else {
484  int mx, my;
485  int refbw_m1, refbh_m1;
486  int th;
487  VP56mv mv;
488 
489  if (s->ss_h) {
490  // BUG https://code.google.com/p/webm/issues/detail?id=820
491  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
492  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
493  } else {
494  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
495  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
496  }
497  if (s->ss_v) {
498  // BUG https://code.google.com/p/webm/issues/detail?id=820
499  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
500  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
501  } else {
502  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
503  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
504  }
505 #undef scale_mv
506  y = my >> 4;
507  x = mx >> 4;
508  ref_u += y * src_stride_u + x * bytesperpixel;
509  ref_v += y * src_stride_v + x * bytesperpixel;
510  mx &= 15;
511  my &= 15;
512  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
513  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
514  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
515  // we use +7 because the last 7 pixels of each sbrow can be changed in
516  // the longest loopfilter of the next sbrow
517  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
518  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
519  // The arm/aarch64 _hv filters read one more row than what actually is
520  // needed, so switch to emulated edge one pixel sooner vertically
521  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
522  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
523  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
524  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
525  288, src_stride_u,
526  refbw_m1 + 8, refbh_m1 + 8,
527  x - 3, y - 3, w, h);
528  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
529  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
530 
531  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
532  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
533  288, src_stride_v,
534  refbw_m1 + 8, refbh_m1 + 8,
535  x - 3, y - 3, w, h);
536  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
537  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
538  } else {
539  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
540  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
541  }
542  }
543 }
544 
545 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
546  px, py, pw, ph, bw, bh, w, h, i) \
547  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
548  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
549  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
550 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
551  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
552  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
553  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
554  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
555 #define SCALED 1
556 #define FN(x) x##_scaled_8bpp
557 #define BYTES_PER_PIXEL 1
558 #include "vp9_mc_template.c"
559 #undef FN
560 #undef BYTES_PER_PIXEL
561 #define FN(x) x##_scaled_16bpp
562 #define BYTES_PER_PIXEL 2
563 #include "vp9_mc_template.c"
564 #undef mc_luma_dir
565 #undef mc_chroma_dir
566 #undef FN
567 #undef BYTES_PER_PIXEL
568 #undef SCALED
569 
570 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
571 {
572  VP9Context *s = td->s;
573  VP9Block *b = td->b;
574  int row = td->row, col = td->col;
575 
576  if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
577  (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
578  if (!s->td->error_info) {
579  s->td->error_info = AVERROR_INVALIDDATA;
580  av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
581  "reference frame has invalid dimensions\n");
582  }
583  return;
584  }
585 
586  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
587  if (bytesperpixel == 1) {
588  inter_pred_scaled_8bpp(td);
589  } else {
590  inter_pred_scaled_16bpp(td);
591  }
592  } else {
593  if (bytesperpixel == 1) {
594  inter_pred_8bpp(td);
595  } else {
596  inter_pred_16bpp(td);
597  }
598  }
599 
600  if (!b->skip) {
601  /* mostly copied intra_recon() */
602 
603  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
604  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
605  int end_x = FFMIN(2 * (s->cols - col), w4);
606  int end_y = FFMIN(2 * (s->rows - row), h4);
607  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
608  int uvstep1d = 1 << b->uvtx, p;
609  uint8_t *dst = td->dst[0];
610 
611  // y itxfm add
612  for (n = 0, y = 0; y < end_y; y += step1d) {
613  uint8_t *ptr = dst;
614  for (x = 0; x < end_x; x += step1d,
615  ptr += 4 * step1d * bytesperpixel, n += step) {
616  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
617 
618  if (eob)
619  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
620  td->block + 16 * n * bytesperpixel, eob);
621  }
622  dst += 4 * td->y_stride * step1d;
623  }
624 
625  // uv itxfm add
626  end_x >>= s->ss_h;
627  end_y >>= s->ss_v;
628  step = 1 << (b->uvtx * 2);
629  for (p = 0; p < 2; p++) {
630  dst = td->dst[p + 1];
631  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
632  uint8_t *ptr = dst;
633  for (x = 0; x < end_x; x += uvstep1d,
634  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
635  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
636 
637  if (eob)
638  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
639  td->uvblock[p] + 16 * n * bytesperpixel, eob);
640  }
641  dst += 4 * uvstep1d * td->uv_stride;
642  }
643  }
644  }
645 }
646 
648 {
649  inter_recon(td, 1);
650 }
651 
653 {
654  inter_recon(td, 2);
655 }
stride
int stride
Definition: mace.c:144
td
#define td
Definition: regdef.h:70
av_clip
#define av_clip
Definition: common.h:122
VP56mv::x
int16_t x
Definition: vp56.h:69
mem_internal.h
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
mv
static const int8_t mv[256][2]
Definition: 4xm.c:78
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:376
w
uint8_t w
Definition: llviddspenc.c:39
internal.h
DC_PRED
@ DC_PRED
Definition: vp9.h:48
b
#define b
Definition: input.c:41
VERT_LEFT_PRED
@ VERT_LEFT_PRED
Definition: vp9.h:53
inter_recon
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:570
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:294
ThreadFrame::f
AVFrame * f
Definition: thread.h:35
VP9Block
Definition: vp9dec.h:82
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
VERT_PRED
@ VERT_PRED
Definition: vp9.h:46
assign_val
#define assign_val(c, i, v)
check_intra_mode
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:33
DIAG_DOWN_RIGHT_PRED
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
s
#define s(width, name)
Definition: cbs_vp9.c:257
vp9data.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
VP56mv::y
int16_t y
Definition: vp56.h:70
VP56mv
Definition: vp56.h:68
memset_val
#define memset_val(c, val, num)
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:652
assign_bpp
#define assign_bpp(c, i1, v, i2)
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
VP9Context
Definition: vp9dec.h:94
mc_chroma_scaled
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:465
TX_8X8
@ TX_8X8
Definition: vp9.h:29
vp9_mc_template.c
TxfmMode
TxfmMode
Definition: vp9.h:27
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
TxfmType
TxfmType
Definition: vp9.h:37
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
REF_INVALID_SCALE
#define REF_INVALID_SCALE
Definition: vp9dec.h:40
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
mc_luma_unscaled
static av_always_inline void mc_luma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:299
VERT_RIGHT_PRED
@ VERT_RIGHT_PRED
Definition: vp9.h:51
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
scale_mv
#define scale_mv(n, dim)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
th
#define th
Definition: regdef.h:75
mc_chroma_unscaled
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:335
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
int i
Definition: input.c:407
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
ff_vp9_intra_txfm_type
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
av_always_inline
#define av_always_inline
Definition: attributes.h:49
uint8_t
uint8_t
Definition: audio_convert.c:194
mc_luma_scaled
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:407
avcodec.h
VP9TileData
Definition: vp9dec.h:164
AVFrame::height
int height
Definition: frame.h:376
ThreadFrame
Definition: thread.h:34
HOR_UP_PRED
@ HOR_UP_PRED
Definition: vp9.h:54
mode
mode
Definition: ebur128.h:83
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
HOR_DOWN_PRED
@ HOR_DOWN_PRED
Definition: vp9.h:52
vp9dec.h
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
videodsp.h
DIAG_DOWN_LEFT_PRED
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
memset_bpp
#define memset_bpp(c, i1, v, i2, num)
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:647
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
h
h
Definition: vp9dsp_template.c:2038
intra_recon
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:219
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:289
mc
#define mc
Definition: vf_colormatrix.c:102