FFmpeg
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevcdec.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
30  GetBitContext *gb, int pcm_bit_depth)
31 {
32  int x, y;
33  pixel *dst = (pixel *)_dst;
34 
35  stride /= sizeof(pixel);
36 
37  for (y = 0; y < height; y++) {
38  for (x = 0; x < width; x++)
39  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
40  dst += stride;
41  }
42 }
43 
44 static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res,
45  ptrdiff_t stride, int size)
46 {
47  int x, y;
48  pixel *dst = (pixel *)_dst;
49 
50  stride /= sizeof(pixel);
51 
52  for (y = 0; y < size; y++) {
53  for (x = 0; x < size; x++) {
54  dst[x] = av_clip_pixel(dst[x] + *res);
55  res++;
56  }
57  dst += stride;
58  }
59 }
60 
61 static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res,
62  ptrdiff_t stride)
63 {
64  FUNC(add_residual)(_dst, res, stride, 4);
65 }
66 
67 static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res,
68  ptrdiff_t stride)
69 {
70  FUNC(add_residual)(_dst, res, stride, 8);
71 }
72 
73 static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res,
74  ptrdiff_t stride)
75 {
76  FUNC(add_residual)(_dst, res, stride, 16);
77 }
78 
79 static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res,
80  ptrdiff_t stride)
81 {
82  FUNC(add_residual)(_dst, res, stride, 32);
83 }
84 
85 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
86 {
87  int16_t *coeffs = (int16_t *) _coeffs;
88  int x, y;
89  int size = 1 << log2_size;
90 
91  if (mode) {
92  coeffs += size;
93  for (y = 0; y < size - 1; y++) {
94  for (x = 0; x < size; x++)
95  coeffs[x] += coeffs[x - size];
96  coeffs += size;
97  }
98  } else {
99  for (y = 0; y < size; y++) {
100  for (x = 1; x < size; x++)
101  coeffs[x] += coeffs[x - 1];
102  coeffs += size;
103  }
104  }
105 }
106 
107 static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
108 {
109  int shift = 15 - BIT_DEPTH - log2_size;
110  int x, y;
111  int size = 1 << log2_size;
112 
113  if (shift > 0) {
114  int offset = 1 << (shift - 1);
115  for (y = 0; y < size; y++) {
116  for (x = 0; x < size; x++) {
117  *coeffs = (*coeffs + offset) >> shift;
118  coeffs++;
119  }
120  }
121  } else {
122  for (y = 0; y < size; y++) {
123  for (x = 0; x < size; x++) {
124  *coeffs = *(uint16_t*)coeffs << -shift;
125  coeffs++;
126  }
127  }
128  }
129 }
130 
131 #define SET(dst, x) (dst) = (x)
132 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
133 
134 #define TR_4x4_LUMA(dst, src, step, assign) \
135  do { \
136  int c0 = src[0 * step] + src[2 * step]; \
137  int c1 = src[2 * step] + src[3 * step]; \
138  int c2 = src[0 * step] - src[3 * step]; \
139  int c3 = 74 * src[1 * step]; \
140  \
141  assign(dst[2 * step], 74 * (src[0 * step] - \
142  src[2 * step] + \
143  src[3 * step])); \
144  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
145  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
146  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
147  } while (0)
148 
149 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
150 {
151  int i;
152  int shift = 7;
153  int add = 1 << (shift - 1);
154  int16_t *src = coeffs;
155 
156  for (i = 0; i < 4; i++) {
157  TR_4x4_LUMA(src, src, 4, SCALE);
158  src++;
159  }
160 
161  shift = 20 - BIT_DEPTH;
162  add = 1 << (shift - 1);
163  for (i = 0; i < 4; i++) {
164  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
165  coeffs += 4;
166  }
167 }
168 
169 #undef TR_4x4_LUMA
170 
171 #define TR_4(dst, src, dstep, sstep, assign, end) \
172  do { \
173  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
174  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
175  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
176  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
177  \
178  assign(dst[0 * dstep], e0 + o0); \
179  assign(dst[1 * dstep], e1 + o1); \
180  assign(dst[2 * dstep], e1 - o1); \
181  assign(dst[3 * dstep], e0 - o0); \
182  } while (0)
183 
184 #define TR_8(dst, src, dstep, sstep, assign, end) \
185  do { \
186  int i, j; \
187  int e_8[4]; \
188  int o_8[4] = { 0 }; \
189  for (i = 0; i < 4; i++) \
190  for (j = 1; j < end; j += 2) \
191  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
192  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
193  \
194  for (i = 0; i < 4; i++) { \
195  assign(dst[i * dstep], e_8[i] + o_8[i]); \
196  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
197  } \
198  } while (0)
199 
200 #define TR_16(dst, src, dstep, sstep, assign, end) \
201  do { \
202  int i, j; \
203  int e_16[8]; \
204  int o_16[8] = { 0 }; \
205  for (i = 0; i < 8; i++) \
206  for (j = 1; j < end; j += 2) \
207  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
208  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
209  \
210  for (i = 0; i < 8; i++) { \
211  assign(dst[i * dstep], e_16[i] + o_16[i]); \
212  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
213  } \
214  } while (0)
215 
216 #define TR_32(dst, src, dstep, sstep, assign, end) \
217  do { \
218  int i, j; \
219  int e_32[16]; \
220  int o_32[16] = { 0 }; \
221  for (i = 0; i < 16; i++) \
222  for (j = 1; j < end; j += 2) \
223  o_32[i] += transform[j][i] * src[j * sstep]; \
224  TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
225  \
226  for (i = 0; i < 16; i++) { \
227  assign(dst[i * dstep], e_32[i] + o_32[i]); \
228  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
229  } \
230  } while (0)
231 
232 #define IDCT_VAR4(H) \
233  int limit2 = FFMIN(col_limit + 4, H)
234 #define IDCT_VAR8(H) \
235  int limit = FFMIN(col_limit, H); \
236  int limit2 = FFMIN(col_limit + 4, H)
237 #define IDCT_VAR16(H) IDCT_VAR8(H)
238 #define IDCT_VAR32(H) IDCT_VAR8(H)
239 
240 #define IDCT(H) \
241 static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
242  int col_limit) \
243 { \
244  int i; \
245  int shift = 7; \
246  int add = 1 << (shift - 1); \
247  int16_t *src = coeffs; \
248  IDCT_VAR ## H(H); \
249  \
250  for (i = 0; i < H; i++) { \
251  TR_ ## H(src, src, H, H, SCALE, limit2); \
252  if (limit2 < H && i%4 == 0 && !!i) \
253  limit2 -= 4; \
254  src++; \
255  } \
256  \
257  shift = 20 - BIT_DEPTH; \
258  add = 1 << (shift - 1); \
259  for (i = 0; i < H; i++) { \
260  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
261  coeffs += H; \
262  } \
263 }
264 
265 #define IDCT_DC(H) \
266 static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
267 { \
268  int i, j; \
269  int shift = 14 - BIT_DEPTH; \
270  int add = 1 << (shift - 1); \
271  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
272  \
273  for (j = 0; j < H; j++) { \
274  for (i = 0; i < H; i++) { \
275  coeffs[i + j * H] = coeff; \
276  } \
277  } \
278 }
279 
280 IDCT( 4)
281 IDCT( 8)
282 IDCT(16)
283 IDCT(32)
284 
285 IDCT_DC( 4)
286 IDCT_DC( 8)
287 IDCT_DC(16)
288 IDCT_DC(32)
289 
290 #undef TR_4
291 #undef TR_8
292 #undef TR_16
293 #undef TR_32
294 
295 #undef SET
296 #undef SCALE
297 
298 static void FUNC(sao_band_filter)(uint8_t *_dst, const uint8_t *_src,
299  ptrdiff_t stride_dst, ptrdiff_t stride_src,
300  const int16_t *sao_offset_val, int sao_left_class,
301  int width, int height)
302 {
303  pixel *dst = (pixel *)_dst;
304  const pixel *src = (const pixel *)_src;
305  int offset_table[32] = { 0 };
306  int k, y, x;
307  int shift = BIT_DEPTH - 5;
308 
309  stride_dst /= sizeof(pixel);
310  stride_src /= sizeof(pixel);
311 
312  for (k = 0; k < 4; k++)
313  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
314  for (y = 0; y < height; y++) {
315  for (x = 0; x < width; x++)
316  dst[x] = av_clip_pixel(src[x] + offset_table[(src[x] >> shift) & 31]);
317  dst += stride_dst;
318  src += stride_src;
319  }
320 }
321 
322 #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
323 
324 static void FUNC(sao_edge_filter)(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, const int16_t *sao_offset_val,
325  int eo, int width, int height) {
326 
327  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
328  static const int8_t pos[4][2][2] = {
329  { { -1, 0 }, { 1, 0 } }, // horizontal
330  { { 0, -1 }, { 0, 1 } }, // vertical
331  { { -1, -1 }, { 1, 1 } }, // 45 degree
332  { { 1, -1 }, { -1, 1 } }, // 135 degree
333  };
334  pixel *dst = (pixel *)_dst;
335  const pixel *src = (const pixel *)_src;
336  int a_stride, b_stride;
337  int x, y;
338  ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
339  stride_dst /= sizeof(pixel);
340 
341  a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
342  b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
343  for (y = 0; y < height; y++) {
344  for (x = 0; x < width; x++) {
345  int diff0 = CMP(src[x], src[x + a_stride]);
346  int diff1 = CMP(src[x], src[x + b_stride]);
347  int offset_val = edge_idx[2 + diff0 + diff1];
348  dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
349  }
350  src += stride_src;
351  dst += stride_dst;
352  }
353 }
354 
355 static void FUNC(sao_edge_restore_0)(uint8_t *_dst, const uint8_t *_src,
356  ptrdiff_t stride_dst, ptrdiff_t stride_src, const SAOParams *sao,
357  const int *borders, int _width, int _height,
358  int c_idx, const uint8_t *vert_edge,
359  const uint8_t *horiz_edge, const uint8_t *diag_edge)
360 {
361  int x, y;
362  pixel *dst = (pixel *)_dst;
363  const pixel *src = (const pixel *)_src;
364  const int16_t *sao_offset_val = sao->offset_val[c_idx];
365  int sao_eo_class = sao->eo_class[c_idx];
366  int init_x = 0, width = _width, height = _height;
367 
368  stride_dst /= sizeof(pixel);
369  stride_src /= sizeof(pixel);
370 
371  if (sao_eo_class != SAO_EO_VERT) {
372  if (borders[0]) {
373  int offset_val = sao_offset_val[0];
374  for (y = 0; y < height; y++) {
375  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
376  }
377  init_x = 1;
378  }
379  if (borders[2]) {
380  int offset_val = sao_offset_val[0];
381  int offset = width - 1;
382  for (x = 0; x < height; x++) {
383  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
384  }
385  width--;
386  }
387  }
388  if (sao_eo_class != SAO_EO_HORIZ) {
389  if (borders[1]) {
390  int offset_val = sao_offset_val[0];
391  for (x = init_x; x < width; x++)
392  dst[x] = av_clip_pixel(src[x] + offset_val);
393  }
394  if (borders[3]) {
395  int offset_val = sao_offset_val[0];
396  ptrdiff_t y_stride_dst = stride_dst * (height - 1);
397  ptrdiff_t y_stride_src = stride_src * (height - 1);
398  for (x = init_x; x < width; x++)
399  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
400  height--;
401  }
402  }
403 }
404 
405 static void FUNC(sao_edge_restore_1)(uint8_t *_dst, const uint8_t *_src,
406  ptrdiff_t stride_dst, ptrdiff_t stride_src, const SAOParams *sao,
407  const int *borders, int _width, int _height,
408  int c_idx, const uint8_t *vert_edge,
409  const uint8_t *horiz_edge, const uint8_t *diag_edge)
410 {
411  int x, y;
412  pixel *dst = (pixel *)_dst;
413  const pixel *src = (const pixel *)_src;
414  const int16_t *sao_offset_val = sao->offset_val[c_idx];
415  int sao_eo_class = sao->eo_class[c_idx];
416  int init_x = 0, init_y = 0, width = _width, height = _height;
417 
418  stride_dst /= sizeof(pixel);
419  stride_src /= sizeof(pixel);
420 
421  if (sao_eo_class != SAO_EO_VERT) {
422  if (borders[0]) {
423  int offset_val = sao_offset_val[0];
424  for (y = 0; y < height; y++) {
425  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
426  }
427  init_x = 1;
428  }
429  if (borders[2]) {
430  int offset_val = sao_offset_val[0];
431  int offset = width - 1;
432  for (x = 0; x < height; x++) {
433  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
434  }
435  width--;
436  }
437  }
438  if (sao_eo_class != SAO_EO_HORIZ) {
439  if (borders[1]) {
440  int offset_val = sao_offset_val[0];
441  for (x = init_x; x < width; x++)
442  dst[x] = av_clip_pixel(src[x] + offset_val);
443  init_y = 1;
444  }
445  if (borders[3]) {
446  int offset_val = sao_offset_val[0];
447  ptrdiff_t y_stride_dst = stride_dst * (height - 1);
448  ptrdiff_t y_stride_src = stride_src * (height - 1);
449  for (x = init_x; x < width; x++)
450  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
451  height--;
452  }
453  }
454 
455  {
456  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
457  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
458  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
459  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
460 
461  // Restore pixels that can't be modified
462  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
463  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
464  dst[y*stride_dst] = src[y*stride_src];
465  }
466  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
467  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
468  dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
469  }
470 
471  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
472  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
473  dst[x] = src[x];
474  }
475  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
476  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
477  dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
478  }
479  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
480  dst[0] = src[0];
481  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
482  dst[width-1] = src[width-1];
483  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
484  dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
485  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
486  dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
487 
488  }
489 }
490 
491 #undef CMP
492 
493 ////////////////////////////////////////////////////////////////////////////////
494 //
495 ////////////////////////////////////////////////////////////////////////////////
496 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
497  const uint8_t *_src, ptrdiff_t _srcstride,
498  int height, intptr_t mx, intptr_t my, int width)
499 {
500  int x, y;
501  const pixel *src = (const pixel *)_src;
502  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
503 
504  for (y = 0; y < height; y++) {
505  for (x = 0; x < width; x++)
506  dst[x] = src[x] << (14 - BIT_DEPTH);
507  src += srcstride;
508  dst += MAX_PB_SIZE;
509  }
510 }
511 
512 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
513  int height, intptr_t mx, intptr_t my, int width)
514 {
515  int y;
516  const pixel *src = (const pixel *)_src;
517  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
518  pixel *dst = (pixel *)_dst;
519  ptrdiff_t dststride = _dststride / sizeof(pixel);
520 
521  for (y = 0; y < height; y++) {
522  memcpy(dst, src, width * sizeof(pixel));
523  src += srcstride;
524  dst += dststride;
525  }
526 }
527 
528 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
529  const int16_t *src2,
530  int height, intptr_t mx, intptr_t my, int width)
531 {
532  int x, y;
533  const pixel *src = (const pixel *)_src;
534  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
535  pixel *dst = (pixel *)_dst;
536  ptrdiff_t dststride = _dststride / sizeof(pixel);
537 
538  int shift = 14 + 1 - BIT_DEPTH;
539 #if BIT_DEPTH < 14
540  int offset = 1 << (shift - 1);
541 #else
542  int offset = 0;
543 #endif
544 
545  for (y = 0; y < height; y++) {
546  for (x = 0; x < width; x++)
547  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
548  src += srcstride;
549  dst += dststride;
550  src2 += MAX_PB_SIZE;
551  }
552 }
553 
554 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
555  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
556 {
557  int x, y;
558  const pixel *src = (const pixel *)_src;
559  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
560  pixel *dst = (pixel *)_dst;
561  ptrdiff_t dststride = _dststride / sizeof(pixel);
562  int shift = denom + 14 - BIT_DEPTH;
563 #if BIT_DEPTH < 14
564  int offset = 1 << (shift - 1);
565 #else
566  int offset = 0;
567 #endif
568 
569  ox = ox * (1 << (BIT_DEPTH - 8));
570  for (y = 0; y < height; y++) {
571  for (x = 0; x < width; x++)
572  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
573  src += srcstride;
574  dst += dststride;
575  }
576 }
577 
578 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
579  const int16_t *src2,
580  int height, int denom, int wx0, int wx1,
581  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
582 {
583  int x, y;
584  const pixel *src = (const pixel *)_src;
585  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
586  pixel *dst = (pixel *)_dst;
587  ptrdiff_t dststride = _dststride / sizeof(pixel);
588 
589  int shift = 14 + 1 - BIT_DEPTH;
590  int log2Wd = denom + shift - 1;
591 
592  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
593  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
594  for (y = 0; y < height; y++) {
595  for (x = 0; x < width; x++) {
596  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
597  }
598  src += srcstride;
599  dst += dststride;
600  src2 += MAX_PB_SIZE;
601  }
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 //
606 ////////////////////////////////////////////////////////////////////////////////
607 #define QPEL_FILTER(src, stride) \
608  (filter[0] * src[x - 3 * stride] + \
609  filter[1] * src[x - 2 * stride] + \
610  filter[2] * src[x - stride] + \
611  filter[3] * src[x ] + \
612  filter[4] * src[x + stride] + \
613  filter[5] * src[x + 2 * stride] + \
614  filter[6] * src[x + 3 * stride] + \
615  filter[7] * src[x + 4 * stride])
616 
617 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
618  const uint8_t *_src, ptrdiff_t _srcstride,
619  int height, intptr_t mx, intptr_t my, int width)
620 {
621  int x, y;
622  const pixel *src = (const pixel*)_src;
623  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
624  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
625  for (y = 0; y < height; y++) {
626  for (x = 0; x < width; x++)
627  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
628  src += srcstride;
629  dst += MAX_PB_SIZE;
630  }
631 }
632 
633 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
634  const uint8_t *_src, ptrdiff_t _srcstride,
635  int height, intptr_t mx, intptr_t my, int width)
636 {
637  int x, y;
638  const pixel *src = (const pixel*)_src;
639  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
640  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
641  for (y = 0; y < height; y++) {
642  for (x = 0; x < width; x++)
643  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
644  src += srcstride;
645  dst += MAX_PB_SIZE;
646  }
647 }
648 
649 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
650  const uint8_t *_src,
651  ptrdiff_t _srcstride,
652  int height, intptr_t mx,
653  intptr_t my, int width)
654 {
655  int x, y;
656  const int8_t *filter;
657  const pixel *src = (const pixel*)_src;
658  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
659  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
660  int16_t *tmp = tmp_array;
661 
662  src -= QPEL_EXTRA_BEFORE * srcstride;
663  filter = ff_hevc_qpel_filters[mx - 1];
664  for (y = 0; y < height + QPEL_EXTRA; y++) {
665  for (x = 0; x < width; x++)
666  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
667  src += srcstride;
668  tmp += MAX_PB_SIZE;
669  }
670 
671  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
672  filter = ff_hevc_qpel_filters[my - 1];
673  for (y = 0; y < height; y++) {
674  for (x = 0; x < width; x++)
675  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
676  tmp += MAX_PB_SIZE;
677  dst += MAX_PB_SIZE;
678  }
679 }
680 
681 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
682  const uint8_t *_src, ptrdiff_t _srcstride,
683  int height, intptr_t mx, intptr_t my, int width)
684 {
685  int x, y;
686  const pixel *src = (const pixel*)_src;
687  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
688  pixel *dst = (pixel *)_dst;
689  ptrdiff_t dststride = _dststride / sizeof(pixel);
690  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
691  int shift = 14 - BIT_DEPTH;
692 
693 #if BIT_DEPTH < 14
694  int offset = 1 << (shift - 1);
695 #else
696  int offset = 0;
697 #endif
698 
699  for (y = 0; y < height; y++) {
700  for (x = 0; x < width; x++)
701  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
702  src += srcstride;
703  dst += dststride;
704  }
705 }
706 
707 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
708  const int16_t *src2,
709  int height, intptr_t mx, intptr_t my, int width)
710 {
711  int x, y;
712  const pixel *src = (const pixel*)_src;
713  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
714  pixel *dst = (pixel *)_dst;
715  ptrdiff_t dststride = _dststride / sizeof(pixel);
716 
717  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
718 
719  int shift = 14 + 1 - BIT_DEPTH;
720 #if BIT_DEPTH < 14
721  int offset = 1 << (shift - 1);
722 #else
723  int offset = 0;
724 #endif
725 
726  for (y = 0; y < height; y++) {
727  for (x = 0; x < width; x++)
728  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
729  src += srcstride;
730  dst += dststride;
731  src2 += MAX_PB_SIZE;
732  }
733 }
734 
735 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
736  const uint8_t *_src, ptrdiff_t _srcstride,
737  int height, intptr_t mx, intptr_t my, int width)
738 {
739  int x, y;
740  const pixel *src = (const pixel*)_src;
741  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
742  pixel *dst = (pixel *)_dst;
743  ptrdiff_t dststride = _dststride / sizeof(pixel);
744  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
745  int shift = 14 - BIT_DEPTH;
746 
747 #if BIT_DEPTH < 14
748  int offset = 1 << (shift - 1);
749 #else
750  int offset = 0;
751 #endif
752 
753  for (y = 0; y < height; y++) {
754  for (x = 0; x < width; x++)
755  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
756  src += srcstride;
757  dst += dststride;
758  }
759 }
760 
761 
762 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
763  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
764  int height, intptr_t mx, intptr_t my, int width)
765 {
766  int x, y;
767  const pixel *src = (const pixel*)_src;
768  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
769  pixel *dst = (pixel *)_dst;
770  ptrdiff_t dststride = _dststride / sizeof(pixel);
771 
772  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
773 
774  int shift = 14 + 1 - BIT_DEPTH;
775 #if BIT_DEPTH < 14
776  int offset = 1 << (shift - 1);
777 #else
778  int offset = 0;
779 #endif
780 
781  for (y = 0; y < height; y++) {
782  for (x = 0; x < width; x++)
783  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
784  src += srcstride;
785  dst += dststride;
786  src2 += MAX_PB_SIZE;
787  }
788 }
789 
790 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
791  const uint8_t *_src, ptrdiff_t _srcstride,
792  int height, intptr_t mx, intptr_t my, int width)
793 {
794  int x, y;
795  const int8_t *filter;
796  const pixel *src = (const pixel*)_src;
797  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
798  pixel *dst = (pixel *)_dst;
799  ptrdiff_t dststride = _dststride / sizeof(pixel);
800  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
801  int16_t *tmp = tmp_array;
802  int shift = 14 - BIT_DEPTH;
803 
804 #if BIT_DEPTH < 14
805  int offset = 1 << (shift - 1);
806 #else
807  int offset = 0;
808 #endif
809 
810  src -= QPEL_EXTRA_BEFORE * srcstride;
811  filter = ff_hevc_qpel_filters[mx - 1];
812  for (y = 0; y < height + QPEL_EXTRA; y++) {
813  for (x = 0; x < width; x++)
814  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
815  src += srcstride;
816  tmp += MAX_PB_SIZE;
817  }
818 
819  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
820  filter = ff_hevc_qpel_filters[my - 1];
821 
822  for (y = 0; y < height; y++) {
823  for (x = 0; x < width; x++)
824  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
825  tmp += MAX_PB_SIZE;
826  dst += dststride;
827  }
828 }
829 
830 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
831  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
832  int height, intptr_t mx, intptr_t my, int width)
833 {
834  int x, y;
835  const int8_t *filter;
836  const pixel *src = (const pixel*)_src;
837  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
838  pixel *dst = (pixel *)_dst;
839  ptrdiff_t dststride = _dststride / sizeof(pixel);
840  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
841  int16_t *tmp = tmp_array;
842  int shift = 14 + 1 - BIT_DEPTH;
843 #if BIT_DEPTH < 14
844  int offset = 1 << (shift - 1);
845 #else
846  int offset = 0;
847 #endif
848 
849  src -= QPEL_EXTRA_BEFORE * srcstride;
850  filter = ff_hevc_qpel_filters[mx - 1];
851  for (y = 0; y < height + QPEL_EXTRA; y++) {
852  for (x = 0; x < width; x++)
853  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
854  src += srcstride;
855  tmp += MAX_PB_SIZE;
856  }
857 
858  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
859  filter = ff_hevc_qpel_filters[my - 1];
860 
861  for (y = 0; y < height; y++) {
862  for (x = 0; x < width; x++)
863  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
864  tmp += MAX_PB_SIZE;
865  dst += dststride;
866  src2 += MAX_PB_SIZE;
867  }
868 }
869 
870 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
871  const uint8_t *_src, ptrdiff_t _srcstride,
872  int height, int denom, int wx, int ox,
873  intptr_t mx, intptr_t my, int width)
874 {
875  int x, y;
876  const pixel *src = (const pixel*)_src;
877  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
878  pixel *dst = (pixel *)_dst;
879  ptrdiff_t dststride = _dststride / sizeof(pixel);
880  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
881  int shift = denom + 14 - BIT_DEPTH;
882 #if BIT_DEPTH < 14
883  int offset = 1 << (shift - 1);
884 #else
885  int offset = 0;
886 #endif
887 
888  ox = ox * (1 << (BIT_DEPTH - 8));
889  for (y = 0; y < height; y++) {
890  for (x = 0; x < width; x++)
891  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
892  src += srcstride;
893  dst += dststride;
894  }
895 }
896 
897 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
898  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
899  int height, int denom, int wx0, int wx1,
900  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
901 {
902  int x, y;
903  const pixel *src = (const pixel*)_src;
904  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
905  pixel *dst = (pixel *)_dst;
906  ptrdiff_t dststride = _dststride / sizeof(pixel);
907 
908  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
909 
910  int shift = 14 + 1 - BIT_DEPTH;
911  int log2Wd = denom + shift - 1;
912 
913  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
914  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
915  for (y = 0; y < height; y++) {
916  for (x = 0; x < width; x++)
917  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
918  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
919  src += srcstride;
920  dst += dststride;
921  src2 += MAX_PB_SIZE;
922  }
923 }
924 
925 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
926  const uint8_t *_src, ptrdiff_t _srcstride,
927  int height, int denom, int wx, int ox,
928  intptr_t mx, intptr_t my, int width)
929 {
930  int x, y;
931  const pixel *src = (const pixel*)_src;
932  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
933  pixel *dst = (pixel *)_dst;
934  ptrdiff_t dststride = _dststride / sizeof(pixel);
935  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
936  int shift = denom + 14 - BIT_DEPTH;
937 #if BIT_DEPTH < 14
938  int offset = 1 << (shift - 1);
939 #else
940  int offset = 0;
941 #endif
942 
943  ox = ox * (1 << (BIT_DEPTH - 8));
944  for (y = 0; y < height; y++) {
945  for (x = 0; x < width; x++)
946  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
947  src += srcstride;
948  dst += dststride;
949  }
950 }
951 
952 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
953  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
954  int height, int denom, int wx0, int wx1,
955  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
956 {
957  int x, y;
958  const pixel *src = (const pixel*)_src;
959  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
960  pixel *dst = (pixel *)_dst;
961  ptrdiff_t dststride = _dststride / sizeof(pixel);
962 
963  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
964 
965  int shift = 14 + 1 - BIT_DEPTH;
966  int log2Wd = denom + shift - 1;
967 
968  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
969  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
970  for (y = 0; y < height; y++) {
971  for (x = 0; x < width; x++)
972  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
973  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
974  src += srcstride;
975  dst += dststride;
976  src2 += MAX_PB_SIZE;
977  }
978 }
979 
980 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
981  const uint8_t *_src, ptrdiff_t _srcstride,
982  int height, int denom, int wx, int ox,
983  intptr_t mx, intptr_t my, int width)
984 {
985  int x, y;
986  const int8_t *filter;
987  const pixel *src = (const pixel*)_src;
988  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
989  pixel *dst = (pixel *)_dst;
990  ptrdiff_t dststride = _dststride / sizeof(pixel);
991  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
992  int16_t *tmp = tmp_array;
993  int shift = denom + 14 - BIT_DEPTH;
994 #if BIT_DEPTH < 14
995  int offset = 1 << (shift - 1);
996 #else
997  int offset = 0;
998 #endif
999 
1000  src -= QPEL_EXTRA_BEFORE * srcstride;
1001  filter = ff_hevc_qpel_filters[mx - 1];
1002  for (y = 0; y < height + QPEL_EXTRA; y++) {
1003  for (x = 0; x < width; x++)
1004  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1005  src += srcstride;
1006  tmp += MAX_PB_SIZE;
1007  }
1008 
1009  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1010  filter = ff_hevc_qpel_filters[my - 1];
1011 
1012  ox = ox * (1 << (BIT_DEPTH - 8));
1013  for (y = 0; y < height; y++) {
1014  for (x = 0; x < width; x++)
1015  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1016  tmp += MAX_PB_SIZE;
1017  dst += dststride;
1018  }
1019 }
1020 
1021 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1022  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1023  int height, int denom, int wx0, int wx1,
1024  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1025 {
1026  int x, y;
1027  const int8_t *filter;
1028  const pixel *src = (const pixel*)_src;
1029  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1030  pixel *dst = (pixel *)_dst;
1031  ptrdiff_t dststride = _dststride / sizeof(pixel);
1032  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1033  int16_t *tmp = tmp_array;
1034  int shift = 14 + 1 - BIT_DEPTH;
1035  int log2Wd = denom + shift - 1;
1036 
1037  src -= QPEL_EXTRA_BEFORE * srcstride;
1038  filter = ff_hevc_qpel_filters[mx - 1];
1039  for (y = 0; y < height + QPEL_EXTRA; y++) {
1040  for (x = 0; x < width; x++)
1041  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1042  src += srcstride;
1043  tmp += MAX_PB_SIZE;
1044  }
1045 
1046  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1047  filter = ff_hevc_qpel_filters[my - 1];
1048 
1049  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1050  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1051  for (y = 0; y < height; y++) {
1052  for (x = 0; x < width; x++)
1053  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1054  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1055  tmp += MAX_PB_SIZE;
1056  dst += dststride;
1057  src2 += MAX_PB_SIZE;
1058  }
1059 }
1060 
1061 ////////////////////////////////////////////////////////////////////////////////
1062 //
1063 ////////////////////////////////////////////////////////////////////////////////
1064 #define EPEL_FILTER(src, stride) \
1065  (filter[0] * src[x - stride] + \
1066  filter[1] * src[x] + \
1067  filter[2] * src[x + stride] + \
1068  filter[3] * src[x + 2 * stride])
1069 
1070 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1071  const uint8_t *_src, ptrdiff_t _srcstride,
1072  int height, intptr_t mx, intptr_t my, int width)
1073 {
1074  int x, y;
1075  const pixel *src = (const pixel *)_src;
1076  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1077  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1078  for (y = 0; y < height; y++) {
1079  for (x = 0; x < width; x++)
1080  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1081  src += srcstride;
1082  dst += MAX_PB_SIZE;
1083  }
1084 }
1085 
1086 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1087  const uint8_t *_src, ptrdiff_t _srcstride,
1088  int height, intptr_t mx, intptr_t my, int width)
1089 {
1090  int x, y;
1091  const pixel *src = (const pixel *)_src;
1092  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1093  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1094 
1095  for (y = 0; y < height; y++) {
1096  for (x = 0; x < width; x++)
1097  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1098  src += srcstride;
1099  dst += MAX_PB_SIZE;
1100  }
1101 }
1102 
1103 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1104  const uint8_t *_src, ptrdiff_t _srcstride,
1105  int height, intptr_t mx, intptr_t my, int width)
1106 {
1107  int x, y;
1108  const pixel *src = (const pixel *)_src;
1109  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1110  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1111  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1112  int16_t *tmp = tmp_array;
1113 
1114  src -= EPEL_EXTRA_BEFORE * srcstride;
1115 
1116  for (y = 0; y < height + EPEL_EXTRA; y++) {
1117  for (x = 0; x < width; x++)
1118  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1119  src += srcstride;
1120  tmp += MAX_PB_SIZE;
1121  }
1122 
1123  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1124  filter = ff_hevc_epel_filters[my - 1];
1125 
1126  for (y = 0; y < height; y++) {
1127  for (x = 0; x < width; x++)
1128  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1129  tmp += MAX_PB_SIZE;
1130  dst += MAX_PB_SIZE;
1131  }
1132 }
1133 
1134 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1135  int height, intptr_t mx, intptr_t my, int width)
1136 {
1137  int x, y;
1138  const pixel *src = (const pixel *)_src;
1139  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1140  pixel *dst = (pixel *)_dst;
1141  ptrdiff_t dststride = _dststride / sizeof(pixel);
1142  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1143  int shift = 14 - BIT_DEPTH;
1144 #if BIT_DEPTH < 14
1145  int offset = 1 << (shift - 1);
1146 #else
1147  int offset = 0;
1148 #endif
1149 
1150  for (y = 0; y < height; y++) {
1151  for (x = 0; x < width; x++)
1152  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1153  src += srcstride;
1154  dst += dststride;
1155  }
1156 }
1157 
1158 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride,
1159  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1160  int height, intptr_t mx, intptr_t my, int width)
1161 {
1162  int x, y;
1163  const pixel *src = (const pixel *)_src;
1164  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1165  pixel *dst = (pixel *)_dst;
1166  ptrdiff_t dststride = _dststride / sizeof(pixel);
1167  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1168  int shift = 14 + 1 - BIT_DEPTH;
1169 #if BIT_DEPTH < 14
1170  int offset = 1 << (shift - 1);
1171 #else
1172  int offset = 0;
1173 #endif
1174 
1175  for (y = 0; y < height; y++) {
1176  for (x = 0; x < width; x++) {
1177  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1178  }
1179  dst += dststride;
1180  src += srcstride;
1181  src2 += MAX_PB_SIZE;
1182  }
1183 }
1184 
1185 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1186  int height, intptr_t mx, intptr_t my, int width)
1187 {
1188  int x, y;
1189  const pixel *src = (const pixel *)_src;
1190  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1191  pixel *dst = (pixel *)_dst;
1192  ptrdiff_t dststride = _dststride / sizeof(pixel);
1193  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1194  int shift = 14 - BIT_DEPTH;
1195 #if BIT_DEPTH < 14
1196  int offset = 1 << (shift - 1);
1197 #else
1198  int offset = 0;
1199 #endif
1200 
1201  for (y = 0; y < height; y++) {
1202  for (x = 0; x < width; x++)
1203  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1204  src += srcstride;
1205  dst += dststride;
1206  }
1207 }
1208 
1209 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride,
1210  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1211  int height, intptr_t mx, intptr_t my, int width)
1212 {
1213  int x, y;
1214  const pixel *src = (const pixel *)_src;
1215  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1216  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1217  pixel *dst = (pixel *)_dst;
1218  ptrdiff_t dststride = _dststride / sizeof(pixel);
1219  int shift = 14 + 1 - BIT_DEPTH;
1220 #if BIT_DEPTH < 14
1221  int offset = 1 << (shift - 1);
1222 #else
1223  int offset = 0;
1224 #endif
1225 
1226  for (y = 0; y < height; y++) {
1227  for (x = 0; x < width; x++)
1228  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1229  dst += dststride;
1230  src += srcstride;
1231  src2 += MAX_PB_SIZE;
1232  }
1233 }
1234 
1235 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1236  int height, intptr_t mx, intptr_t my, int width)
1237 {
1238  int x, y;
1239  const pixel *src = (const pixel *)_src;
1240  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1241  pixel *dst = (pixel *)_dst;
1242  ptrdiff_t dststride = _dststride / sizeof(pixel);
1243  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1244  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1245  int16_t *tmp = tmp_array;
1246  int shift = 14 - BIT_DEPTH;
1247 #if BIT_DEPTH < 14
1248  int offset = 1 << (shift - 1);
1249 #else
1250  int offset = 0;
1251 #endif
1252 
1253  src -= EPEL_EXTRA_BEFORE * srcstride;
1254 
1255  for (y = 0; y < height + EPEL_EXTRA; y++) {
1256  for (x = 0; x < width; x++)
1257  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1258  src += srcstride;
1259  tmp += MAX_PB_SIZE;
1260  }
1261 
1262  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1263  filter = ff_hevc_epel_filters[my - 1];
1264 
1265  for (y = 0; y < height; y++) {
1266  for (x = 0; x < width; x++)
1267  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1268  tmp += MAX_PB_SIZE;
1269  dst += dststride;
1270  }
1271 }
1272 
1273 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1274  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1275  int height, intptr_t mx, intptr_t my, int width)
1276 {
1277  int x, y;
1278  const pixel *src = (const pixel *)_src;
1279  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1280  pixel *dst = (pixel *)_dst;
1281  ptrdiff_t dststride = _dststride / sizeof(pixel);
1282  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1283  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1284  int16_t *tmp = tmp_array;
1285  int shift = 14 + 1 - BIT_DEPTH;
1286 #if BIT_DEPTH < 14
1287  int offset = 1 << (shift - 1);
1288 #else
1289  int offset = 0;
1290 #endif
1291 
1292  src -= EPEL_EXTRA_BEFORE * srcstride;
1293 
1294  for (y = 0; y < height + EPEL_EXTRA; y++) {
1295  for (x = 0; x < width; x++)
1296  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1297  src += srcstride;
1298  tmp += MAX_PB_SIZE;
1299  }
1300 
1301  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1302  filter = ff_hevc_epel_filters[my - 1];
1303 
1304  for (y = 0; y < height; y++) {
1305  for (x = 0; x < width; x++)
1306  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1307  tmp += MAX_PB_SIZE;
1308  dst += dststride;
1309  src2 += MAX_PB_SIZE;
1310  }
1311 }
1312 
1313 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1314  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1315 {
1316  int x, y;
1317  const pixel *src = (const pixel *)_src;
1318  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1319  pixel *dst = (pixel *)_dst;
1320  ptrdiff_t dststride = _dststride / sizeof(pixel);
1321  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1322  int shift = denom + 14 - BIT_DEPTH;
1323 #if BIT_DEPTH < 14
1324  int offset = 1 << (shift - 1);
1325 #else
1326  int offset = 0;
1327 #endif
1328 
1329  ox = ox * (1 << (BIT_DEPTH - 8));
1330  for (y = 0; y < height; y++) {
1331  for (x = 0; x < width; x++) {
1332  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1333  }
1334  dst += dststride;
1335  src += srcstride;
1336  }
1337 }
1338 
1339 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
1340  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1341  int height, int denom, int wx0, int wx1,
1342  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1343 {
1344  int x, y;
1345  const pixel *src = (const pixel *)_src;
1346  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1347  pixel *dst = (pixel *)_dst;
1348  ptrdiff_t dststride = _dststride / sizeof(pixel);
1349  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1350  int shift = 14 + 1 - BIT_DEPTH;
1351  int log2Wd = denom + shift - 1;
1352 
1353  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1354  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1355  for (y = 0; y < height; y++) {
1356  for (x = 0; x < width; x++)
1357  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1358  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1359  src += srcstride;
1360  dst += dststride;
1361  src2 += MAX_PB_SIZE;
1362  }
1363 }
1364 
1365 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1366  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1367 {
1368  int x, y;
1369  const pixel *src = (const pixel *)_src;
1370  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1371  pixel *dst = (pixel *)_dst;
1372  ptrdiff_t dststride = _dststride / sizeof(pixel);
1373  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1374  int shift = denom + 14 - BIT_DEPTH;
1375 #if BIT_DEPTH < 14
1376  int offset = 1 << (shift - 1);
1377 #else
1378  int offset = 0;
1379 #endif
1380 
1381  ox = ox * (1 << (BIT_DEPTH - 8));
1382  for (y = 0; y < height; y++) {
1383  for (x = 0; x < width; x++) {
1384  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1385  }
1386  dst += dststride;
1387  src += srcstride;
1388  }
1389 }
1390 
1391 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
1392  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1393  int height, int denom, int wx0, int wx1,
1394  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1395 {
1396  int x, y;
1397  const pixel *src = (const pixel *)_src;
1398  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1399  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1400  pixel *dst = (pixel *)_dst;
1401  ptrdiff_t dststride = _dststride / sizeof(pixel);
1402  int shift = 14 + 1 - BIT_DEPTH;
1403  int log2Wd = denom + shift - 1;
1404 
1405  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1406  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1407  for (y = 0; y < height; y++) {
1408  for (x = 0; x < width; x++)
1409  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1410  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1411  src += srcstride;
1412  dst += dststride;
1413  src2 += MAX_PB_SIZE;
1414  }
1415 }
1416 
1417 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride,
1418  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1419 {
1420  int x, y;
1421  const pixel *src = (const pixel *)_src;
1422  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1423  pixel *dst = (pixel *)_dst;
1424  ptrdiff_t dststride = _dststride / sizeof(pixel);
1425  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1426  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1427  int16_t *tmp = tmp_array;
1428  int shift = denom + 14 - BIT_DEPTH;
1429 #if BIT_DEPTH < 14
1430  int offset = 1 << (shift - 1);
1431 #else
1432  int offset = 0;
1433 #endif
1434 
1435  src -= EPEL_EXTRA_BEFORE * srcstride;
1436 
1437  for (y = 0; y < height + EPEL_EXTRA; y++) {
1438  for (x = 0; x < width; x++)
1439  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1440  src += srcstride;
1441  tmp += MAX_PB_SIZE;
1442  }
1443 
1444  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1445  filter = ff_hevc_epel_filters[my - 1];
1446 
1447  ox = ox * (1 << (BIT_DEPTH - 8));
1448  for (y = 0; y < height; y++) {
1449  for (x = 0; x < width; x++)
1450  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1451  tmp += MAX_PB_SIZE;
1452  dst += dststride;
1453  }
1454 }
1455 
1456 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1457  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
1458  int height, int denom, int wx0, int wx1,
1459  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1460 {
1461  int x, y;
1462  const pixel *src = (const pixel *)_src;
1463  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1464  pixel *dst = (pixel *)_dst;
1465  ptrdiff_t dststride = _dststride / sizeof(pixel);
1466  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1467  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1468  int16_t *tmp = tmp_array;
1469  int shift = 14 + 1 - BIT_DEPTH;
1470  int log2Wd = denom + shift - 1;
1471 
1472  src -= EPEL_EXTRA_BEFORE * srcstride;
1473 
1474  for (y = 0; y < height + EPEL_EXTRA; y++) {
1475  for (x = 0; x < width; x++)
1476  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1477  src += srcstride;
1478  tmp += MAX_PB_SIZE;
1479  }
1480 
1481  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1482  filter = ff_hevc_epel_filters[my - 1];
1483 
1484  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1485  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1486  for (y = 0; y < height; y++) {
1487  for (x = 0; x < width; x++)
1488  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1489  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1490  tmp += MAX_PB_SIZE;
1491  dst += dststride;
1492  src2 += MAX_PB_SIZE;
1493  }
1494 }
1495 
1496 // line zero
1497 #define P3 pix[-4 * xstride]
1498 #define P2 pix[-3 * xstride]
1499 #define P1 pix[-2 * xstride]
1500 #define P0 pix[-1 * xstride]
1501 #define Q0 pix[0 * xstride]
1502 #define Q1 pix[1 * xstride]
1503 #define Q2 pix[2 * xstride]
1504 #define Q3 pix[3 * xstride]
1505 
1506 // line three. used only for deblocking decision
1507 #define TP3 pix[-4 * xstride + 3 * ystride]
1508 #define TP2 pix[-3 * xstride + 3 * ystride]
1509 #define TP1 pix[-2 * xstride + 3 * ystride]
1510 #define TP0 pix[-1 * xstride + 3 * ystride]
1511 #define TQ0 pix[0 * xstride + 3 * ystride]
1512 #define TQ1 pix[1 * xstride + 3 * ystride]
1513 #define TQ2 pix[2 * xstride + 3 * ystride]
1514 #define TQ3 pix[3 * xstride + 3 * ystride]
1515 
1516 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
1517  ptrdiff_t _xstride, ptrdiff_t _ystride,
1518  int beta, const int *_tc,
1519  const uint8_t *_no_p, const uint8_t *_no_q)
1520 {
1521  int d, j;
1522  pixel *pix = (pixel *)_pix;
1523  ptrdiff_t xstride = _xstride / sizeof(pixel);
1524  ptrdiff_t ystride = _ystride / sizeof(pixel);
1525 
1526  beta <<= BIT_DEPTH - 8;
1527 
1528  for (j = 0; j < 2; j++) {
1529  const int dp0 = abs(P2 - 2 * P1 + P0);
1530  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1531  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1532  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1533  const int d0 = dp0 + dq0;
1534  const int d3 = dp3 + dq3;
1535  const int tc = _tc[j] << (BIT_DEPTH - 8);
1536  const int no_p = _no_p[j];
1537  const int no_q = _no_q[j];
1538 
1539  if (d0 + d3 >= beta) {
1540  pix += 4 * ystride;
1541  continue;
1542  } else {
1543  const int beta_3 = beta >> 3;
1544  const int beta_2 = beta >> 2;
1545  const int tc25 = ((tc * 5 + 1) >> 1);
1546 
1547  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1548  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1549  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1550  // strong filtering
1551  const int tc2 = tc << 1;
1552  for (d = 0; d < 4; d++) {
1553  const int p3 = P3;
1554  const int p2 = P2;
1555  const int p1 = P1;
1556  const int p0 = P0;
1557  const int q0 = Q0;
1558  const int q1 = Q1;
1559  const int q2 = Q2;
1560  const int q3 = Q3;
1561  if (!no_p) {
1562  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1563  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1564  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1565  }
1566  if (!no_q) {
1567  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1568  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1569  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1570  }
1571  pix += ystride;
1572  }
1573  } else { // normal filtering
1574  int nd_p = 1;
1575  int nd_q = 1;
1576  const int tc_2 = tc >> 1;
1577  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1578  nd_p = 2;
1579  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1580  nd_q = 2;
1581 
1582  for (d = 0; d < 4; d++) {
1583  const int p2 = P2;
1584  const int p1 = P1;
1585  const int p0 = P0;
1586  const int q0 = Q0;
1587  const int q1 = Q1;
1588  const int q2 = Q2;
1589  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1590  if (abs(delta0) < 10 * tc) {
1591  delta0 = av_clip(delta0, -tc, tc);
1592  if (!no_p)
1593  P0 = av_clip_pixel(p0 + delta0);
1594  if (!no_q)
1595  Q0 = av_clip_pixel(q0 - delta0);
1596  if (!no_p && nd_p > 1) {
1597  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1598  P1 = av_clip_pixel(p1 + deltap1);
1599  }
1600  if (!no_q && nd_q > 1) {
1601  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1602  Q1 = av_clip_pixel(q1 + deltaq1);
1603  }
1604  }
1605  pix += ystride;
1606  }
1607  }
1608  }
1609  }
1610 }
1611 
1612 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1613  ptrdiff_t _ystride, const int *_tc,
1614  const uint8_t *_no_p, const uint8_t *_no_q)
1615 {
1616  int d, j, no_p, no_q;
1617  pixel *pix = (pixel *)_pix;
1618  ptrdiff_t xstride = _xstride / sizeof(pixel);
1619  ptrdiff_t ystride = _ystride / sizeof(pixel);
1620 
1621  for (j = 0; j < 2; j++) {
1622  const int tc = _tc[j] << (BIT_DEPTH - 8);
1623  if (tc <= 0) {
1624  pix += 4 * ystride;
1625  continue;
1626  }
1627  no_p = _no_p[j];
1628  no_q = _no_q[j];
1629 
1630  for (d = 0; d < 4; d++) {
1631  int delta0;
1632  const int p1 = P1;
1633  const int p0 = P0;
1634  const int q0 = Q0;
1635  const int q1 = Q1;
1636  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1637  if (!no_p)
1638  P0 = av_clip_pixel(p0 + delta0);
1639  if (!no_q)
1640  Q0 = av_clip_pixel(q0 - delta0);
1641  pix += ystride;
1642  }
1643  }
1644 }
1645 
1646 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1647  const int32_t *tc, const uint8_t *no_p,
1648  const uint8_t *no_q)
1649 {
1650  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1651 }
1652 
1653 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1654  const int32_t *tc, const uint8_t *no_p,
1655  const uint8_t *no_q)
1656 {
1657  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1658 }
1659 
1660 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1661  int beta, const int32_t *tc, const uint8_t *no_p,
1662  const uint8_t *no_q)
1663 {
1664  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1665  beta, tc, no_p, no_q);
1666 }
1667 
1668 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1669  int beta, const int32_t *tc, const uint8_t *no_p,
1670  const uint8_t *no_q)
1671 {
1672  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1673  beta, tc, no_p, no_q);
1674 }
1675 
1676 #undef P3
1677 #undef P2
1678 #undef P1
1679 #undef P0
1680 #undef Q0
1681 #undef Q1
1682 #undef Q2
1683 #undef Q3
1684 
1685 #undef TP3
1686 #undef TP2
1687 #undef TP1
1688 #undef TP0
1689 #undef TQ0
1690 #undef TQ1
1691 #undef TQ2
1692 #undef TQ3
dequant
static void FUNC() dequant(int16_t *coeffs, int16_t log2_size)
Definition: hevcdsp_template.c:107
put_hevc_epel_h
static void FUNC() put_hevc_epel_h(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1070
TP2
#define TP2
Definition: hevcdsp_template.c:1508
TP3
#define TP3
Definition: hevcdsp_template.c:1507
q1
static const uint8_t q1[256]
Definition: twofish.c:100
hevc_h_loop_filter_luma
static void FUNC() hevc_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q)
Definition: hevcdsp_template.c:1660
av_clip
#define av_clip
Definition: common.h:96
put_hevc_epel_bi_v
static void FUNC() put_hevc_epel_bi_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1209
SAO_EO_45D
@ SAO_EO_45D
Definition: hevcdec.h:220
BIT_DEPTH
#define BIT_DEPTH
Definition: bit_depth_template.c:24
P3
#define P3
Definition: hevcdsp_template.c:1497
put_hevc_qpel_bi_hv
static void FUNC() put_hevc_qpel_bi_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:830
TR_4x4_LUMA
#define TR_4x4_LUMA(dst, src, step, assign)
Definition: hevcdsp_template.c:134
put_hevc_qpel_bi_v
static void FUNC() put_hevc_qpel_bi_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:762
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
sao_edge_filter
static void FUNC() sao_edge_filter(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
Definition: hevcdsp_template.c:324
put_hevc_qpel_bi_w_h
static void FUNC() put_hevc_qpel_bi_w_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:897
ff_hevc_epel_filters
const int8_t ff_hevc_epel_filters[7][4]
Definition: hevcdsp.c:94
put_hevc_qpel_bi_w_v
static void FUNC() put_hevc_qpel_bi_w_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:952
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
put_hevc_epel_uni_w_v
static void FUNC() put_hevc_epel_uni_w_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1365
Q1
#define Q1
Definition: hevcdsp_template.c:1502
put_pcm
static void FUNC() put_pcm(uint8_t *_dst, ptrdiff_t stride, int width, int height, GetBitContext *gb, int pcm_bit_depth)
Definition: hevcdsp_template.c:29
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:335
put_hevc_epel_uni_w_h
static void FUNC() put_hevc_epel_uni_w_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1313
SAO_EO_135D
@ SAO_EO_135D
Definition: hevcdec.h:219
sao_edge_restore_0
static void FUNC() sao_edge_restore_0(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const SAOParams *sao, const int *borders, int _width, int _height, int c_idx, const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge)
Definition: hevcdsp_template.c:355
GetBitContext
Definition: get_bits.h:108
put_hevc_qpel_v
static void FUNC() put_hevc_qpel_v(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:633
hevc_v_loop_filter_luma
static void FUNC() hevc_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q)
Definition: hevcdsp_template.c:1668
hevc_loop_filter_chroma
static void FUNC() hevc_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
Definition: hevcdsp_template.c:1612
put_hevc_qpel_h
static void FUNC() put_hevc_qpel_h(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:617
put_hevc_pel_bi_pixels
static void FUNC() put_hevc_pel_bi_pixels(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:528
sao_edge_restore_1
static void FUNC() sao_edge_restore_1(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const SAOParams *sao, const int *borders, int _width, int _height, int c_idx, const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge)
Definition: hevcdsp_template.c:405
hevc_v_loop_filter_chroma
static void FUNC() hevc_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q)
Definition: hevcdsp_template.c:1653
put_hevc_qpel_uni_w_h
static void FUNC() put_hevc_qpel_uni_w_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:870
width
#define width
QPEL_EXTRA_BEFORE
#define QPEL_EXTRA_BEFORE
Definition: hevcdec.h:63
get_bits.h
put_hevc_qpel_uni_v
static void FUNC() put_hevc_qpel_uni_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:735
q0
static const uint8_t q0[256]
Definition: twofish.c:81
hevcdsp.h
FUNC
static void FUNC(ff_hevc_idct_4x4, BIT_DEPTH)
Definition: hevcdsp_template.c:21
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
ff_hevc_qpel_filters
const int8_t ff_hevc_qpel_filters[3][16]
Definition: hevcdsp.c:104
hevc_h_loop_filter_chroma
static void FUNC() hevc_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q)
Definition: hevcdsp_template.c:1646
put_hevc_epel_uni_v
static void FUNC() put_hevc_epel_uni_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1185
TP1
#define TP1
Definition: hevcdsp_template.c:1509
bit_depth_template.c
abs
#define abs(x)
Definition: cuda_runtime.h:35
add_residual4x4
static void FUNC() add_residual4x4(uint8_t *_dst, const int16_t *res, ptrdiff_t stride)
Definition: hevcdsp_template.c:61
IDCT_DC
#define IDCT_DC(H)
Definition: hevcdsp_template.c:265
Q2
#define Q2
Definition: hevcdsp_template.c:1503
TP0
#define TP0
Definition: hevcdsp_template.c:1510
CMP
#define CMP(a, b)
Definition: hevcdsp_template.c:322
put_hevc_qpel_uni_hv
static void FUNC() put_hevc_qpel_uni_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:790
QPEL_FILTER
#define QPEL_FILTER(src, stride)
Definition: hevcdsp_template.c:607
hevcdec.h
TQ3
#define TQ3
Definition: hevcdsp_template.c:1514
add_residual8x8
static void FUNC() add_residual8x8(uint8_t *_dst, const int16_t *res, ptrdiff_t stride)
Definition: hevcdsp_template.c:67
shift
static int shift(int a, int b)
Definition: bonk.c:262
put_hevc_qpel_uni_w_hv
static void FUNC() put_hevc_qpel_uni_w_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:980
size
int size
Definition: twinvq_data.h:10344
P2
#define P2
Definition: hevcdsp_template.c:1498
put_hevc_qpel_uni_w_v
static void FUNC() put_hevc_qpel_uni_w_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:925
QPEL_EXTRA
#define QPEL_EXTRA
Definition: hevcdec.h:65
hevc_loop_filter_luma
static void FUNC() hevc_loop_filter_luma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
Definition: hevcdsp_template.c:1516
put_hevc_qpel_uni_h
static void FUNC() put_hevc_qpel_uni_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:681
height
#define height
TQ0
#define TQ0
Definition: hevcdsp_template.c:1511
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
sao_band_filter
static void FUNC() sao_band_filter(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
Definition: hevcdsp_template.c:298
put_hevc_qpel_bi_w_hv
static void FUNC() put_hevc_qpel_bi_w_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1021
add_residual16x16
static void FUNC() add_residual16x16(uint8_t *_dst, const int16_t *res, ptrdiff_t stride)
Definition: hevcdsp_template.c:73
Q3
#define Q3
Definition: hevcdsp_template.c:1504
SCALE
#define SCALE(dst, x)
Definition: hevcdsp_template.c:132
offset_table
static const uint8_t offset_table[]
Definition: escape130.c:42
EPEL_EXTRA_BEFORE
#define EPEL_EXTRA_BEFORE
Definition: hevcdec.h:60
put_hevc_pel_pixels
static void FUNC() put_hevc_pel_pixels(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:496
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
put_hevc_epel_uni_w_hv
static void FUNC() put_hevc_epel_uni_w_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1417
MAX_PB_SIZE
#define MAX_PB_SIZE
Definition: hevcdsp.h:32
src2
const pixel * src2
Definition: h264pred_template.c:422
put_hevc_qpel_bi_h
static void FUNC() put_hevc_qpel_bi_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:707
av_always_inline
#define av_always_inline
Definition: attributes.h:49
EPEL_FILTER
#define EPEL_FILTER(src, stride)
Definition: hevcdsp_template.c:1064
SAO_EO_HORIZ
@ SAO_EO_HORIZ
Definition: hevcdec.h:217
put_hevc_epel_bi_w_v
static void FUNC() put_hevc_epel_bi_w_v(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1391
SAOParams
Definition: hevcdsp.h:34
stride
#define stride
Definition: h264pred_template.c:537
transform_4x4_luma
static void FUNC() transform_4x4_luma(int16_t *coeffs)
Definition: hevcdsp_template.c:149
put_hevc_pel_uni_pixels
static void FUNC() put_hevc_pel_uni_pixels(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:512
P1
#define P1
Definition: hevcdsp_template.c:1499
av_clip_pixel
#define av_clip_pixel(a)
Definition: bit_depth_template.c:98
pos
unsigned int pos
Definition: spdifenc.c:413
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
put_hevc_epel_uni_h
static void FUNC() put_hevc_epel_uni_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1134
put_hevc_epel_hv
static void FUNC() put_hevc_epel_hv(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1103
Q0
#define Q0
Definition: hevcdsp_template.c:1501
put_hevc_epel_bi_w_h
static void FUNC() put_hevc_epel_bi_w_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1339
mode
mode
Definition: ebur128.h:83
SAO_EO_VERT
@ SAO_EO_VERT
Definition: hevcdec.h:218
IDCT
#define IDCT(H)
Definition: hevcdsp_template.c:240
put_hevc_qpel_hv
static void FUNC() put_hevc_qpel_hv(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:649
tc
#define tc
Definition: regdef.h:69
put_hevc_pel_uni_w_pixels
static void FUNC() put_hevc_pel_uni_w_pixels(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:554
EPEL_EXTRA
#define EPEL_EXTRA
Definition: hevcdec.h:62
add_residual
static av_always_inline void FUNC() add_residual(uint8_t *_dst, const int16_t *res, ptrdiff_t stride, int size)
Definition: hevcdsp_template.c:44
add_residual32x32
static void FUNC() add_residual32x32(uint8_t *_dst, const int16_t *res, ptrdiff_t stride)
Definition: hevcdsp_template.c:79
put_hevc_epel_bi_h
static void FUNC() put_hevc_epel_bi_h(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1158
SAOParams::offset_val
int16_t offset_val[3][5]
SaoOffsetVal.
Definition: hevcdsp.h:42
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
transform_rdpcm
static void FUNC() transform_rdpcm(int16_t *_coeffs, int16_t log2_size, int mode)
Definition: hevcdsp_template.c:85
put_hevc_pel_bi_w_pixels
static void FUNC() put_hevc_pel_bi_w_pixels(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:578
d
d
Definition: ffmpeg_filter.c:368
int32_t
int32_t
Definition: audioconvert.c:56
put_hevc_epel_bi_w_hv
static void FUNC() put_hevc_epel_bi_w_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1456
put_hevc_epel_v
static void FUNC() put_hevc_epel_v(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1086
put_hevc_epel_uni_hv
static void FUNC() put_hevc_epel_uni_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1235
TQ1
#define TQ1
Definition: hevcdsp_template.c:1512
P0
#define P0
Definition: hevcdsp_template.c:1500
TQ2
#define TQ2
Definition: hevcdsp_template.c:1513
put_hevc_epel_bi_hv
static void FUNC() put_hevc_epel_bi_hv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_template.c:1273