FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
31  GetBitContext *gb, int pcm_bit_depth)
32 {
33  int x, y;
34  pixel *dst = (pixel *)_dst;
35 
36  stride /= sizeof(pixel);
37 
38  for (y = 0; y < height; y++) {
39  for (x = 0; x < width; x++)
40  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
41  dst += stride;
42  }
43 }
44 
45 static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
46  ptrdiff_t stride)
47 {
48  int x, y;
49  pixel *dst = (pixel *)_dst;
50 
51  stride /= sizeof(pixel);
52 
53  for (y = 0; y < 4; y++) {
54  for (x = 0; x < 4; x++) {
55  dst[x] = av_clip_pixel(dst[x] + *coeffs);
56  coeffs++;
57  }
58  dst += stride;
59  }
60 }
61 
62 static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
63  ptrdiff_t stride)
64 {
65  int x, y;
66  pixel *dst = (pixel *)_dst;
67 
68  stride /= sizeof(pixel);
69 
70  for (y = 0; y < 8; y++) {
71  for (x = 0; x < 8; x++) {
72  dst[x] = av_clip_pixel(dst[x] + *coeffs);
73  coeffs++;
74  }
75  dst += stride;
76  }
77 }
78 
79 static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
80  ptrdiff_t stride)
81 {
82  int x, y;
83  pixel *dst = (pixel *)_dst;
84 
85  stride /= sizeof(pixel);
86 
87  for (y = 0; y < 16; y++) {
88  for (x = 0; x < 16; x++) {
89  dst[x] = av_clip_pixel(dst[x] + *coeffs);
90  coeffs++;
91  }
92  dst += stride;
93  }
94 }
95 
96 static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
97  ptrdiff_t stride)
98 {
99  int x, y;
100  pixel *dst = (pixel *)_dst;
101 
102  stride /= sizeof(pixel);
103 
104  for (y = 0; y < 32; y++) {
105  for (x = 0; x < 32; x++) {
106  dst[x] = av_clip_pixel(dst[x] + *coeffs);
107  coeffs++;
108  }
109  dst += stride;
110  }
111 }
112 
113 
114 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
115 {
116  int16_t *coeffs = (int16_t *) _coeffs;
117  int x, y;
118  int size = 1 << log2_size;
119 
120  if (mode) {
121  coeffs += size;
122  for (y = 0; y < size - 1; y++) {
123  for (x = 0; x < size; x++)
124  coeffs[x] += coeffs[x - size];
125  coeffs += size;
126  }
127  } else {
128  for (y = 0; y < size; y++) {
129  for (x = 1; x < size; x++)
130  coeffs[x] += coeffs[x - 1];
131  coeffs += size;
132  }
133  }
134 }
135 
136 static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
137 {
138  int shift = 15 - BIT_DEPTH - log2_size;
139  int x, y;
140  int size = 1 << log2_size;
141  int16_t *coeffs = _coeffs;
142 
143 
144  if (shift > 0) {
145  int offset = 1 << (shift - 1);
146  for (y = 0; y < size; y++) {
147  for (x = 0; x < size; x++) {
148  *coeffs = (*coeffs + offset) >> shift;
149  coeffs++;
150  }
151  }
152  } else {
153  for (y = 0; y < size; y++) {
154  for (x = 0; x < size; x++) {
155  *coeffs = *coeffs << -shift;
156  coeffs++;
157  }
158  }
159  }
160 }
161 
162 #define SET(dst, x) (dst) = (x)
163 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
164 #define ADD_AND_SCALE(dst, x) \
165  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
166 
167 #define TR_4x4_LUMA(dst, src, step, assign) \
168  do { \
169  int c0 = src[0 * step] + src[2 * step]; \
170  int c1 = src[2 * step] + src[3 * step]; \
171  int c2 = src[0 * step] - src[3 * step]; \
172  int c3 = 74 * src[1 * step]; \
173  \
174  assign(dst[2 * step], 74 * (src[0 * step] - \
175  src[2 * step] + \
176  src[3 * step])); \
177  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
178  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
179  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
180  } while (0)
181 
182 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
183 {
184  int i;
185  int shift = 7;
186  int add = 1 << (shift - 1);
187  int16_t *src = coeffs;
188 
189  for (i = 0; i < 4; i++) {
190  TR_4x4_LUMA(src, src, 4, SCALE);
191  src++;
192  }
193 
194  shift = 20 - BIT_DEPTH;
195  add = 1 << (shift - 1);
196  for (i = 0; i < 4; i++) {
197  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
198  coeffs += 4;
199  }
200 }
201 
202 #undef TR_4x4_LUMA
203 
204 #define TR_4(dst, src, dstep, sstep, assign, end) \
205  do { \
206  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
207  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
208  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
209  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
210  \
211  assign(dst[0 * dstep], e0 + o0); \
212  assign(dst[1 * dstep], e1 + o1); \
213  assign(dst[2 * dstep], e1 - o1); \
214  assign(dst[3 * dstep], e0 - o0); \
215  } while (0)
216 
217 #define TR_8(dst, src, dstep, sstep, assign, end) \
218  do { \
219  int i, j; \
220  int e_8[4]; \
221  int o_8[4] = { 0 }; \
222  for (i = 0; i < 4; i++) \
223  for (j = 1; j < end; j += 2) \
224  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
225  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
226  \
227  for (i = 0; i < 4; i++) { \
228  assign(dst[i * dstep], e_8[i] + o_8[i]); \
229  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
230  } \
231  } while (0)
232 
233 #define TR_16(dst, src, dstep, sstep, assign, end) \
234  do { \
235  int i, j; \
236  int e_16[8]; \
237  int o_16[8] = { 0 }; \
238  for (i = 0; i < 8; i++) \
239  for (j = 1; j < end; j += 2) \
240  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
241  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
242  \
243  for (i = 0; i < 8; i++) { \
244  assign(dst[i * dstep], e_16[i] + o_16[i]); \
245  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
246  } \
247  } while (0)
248 
249 #define TR_32(dst, src, dstep, sstep, assign, end) \
250  do { \
251  int i, j; \
252  int e_32[16]; \
253  int o_32[16] = { 0 }; \
254  for (i = 0; i < 16; i++) \
255  for (j = 1; j < end; j += 2) \
256  o_32[i] += transform[j][i] * src[j * sstep]; \
257  TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
258  \
259  for (i = 0; i < 16; i++) { \
260  assign(dst[i * dstep], e_32[i] + o_32[i]); \
261  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
262  } \
263  } while (0)
264 
265 #define IDCT_VAR4(H) \
266  int limit2 = FFMIN(col_limit + 4, H)
267 #define IDCT_VAR8(H) \
268  int limit = FFMIN(col_limit, H); \
269  int limit2 = FFMIN(col_limit + 4, H)
270 #define IDCT_VAR16(H) IDCT_VAR8(H)
271 #define IDCT_VAR32(H) IDCT_VAR8(H)
272 
273 #define IDCT(H) \
274 static void FUNC(idct_##H ##x ##H )( \
275  int16_t *coeffs, int col_limit) { \
276  int i; \
277  int shift = 7; \
278  int add = 1 << (shift - 1); \
279  int16_t *src = coeffs; \
280  IDCT_VAR ##H(H); \
281  \
282  for (i = 0; i < H; i++) { \
283  TR_ ## H(src, src, H, H, SCALE, limit2); \
284  if (limit2 < H && i%4 == 0 && !!i) \
285  limit2 -= 4; \
286  src++; \
287  } \
288  \
289  shift = 20 - BIT_DEPTH; \
290  add = 1 << (shift - 1); \
291  for (i = 0; i < H; i++) { \
292  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
293  coeffs += H; \
294  } \
295 }
296 
297 #define IDCT_DC(H) \
298 static void FUNC(idct_##H ##x ##H ##_dc)( \
299  int16_t *coeffs) { \
300  int i, j; \
301  int shift = 14 - BIT_DEPTH; \
302  int add = 1 << (shift - 1); \
303  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
304  \
305  for (j = 0; j < H; j++) { \
306  for (i = 0; i < H; i++) { \
307  coeffs[i+j*H] = coeff; \
308  } \
309  } \
310 }
311 
312 IDCT( 4)
313 IDCT( 8)
314 IDCT(16)
315 IDCT(32)
316 
317 IDCT_DC( 4)
318 IDCT_DC( 8)
319 IDCT_DC(16)
320 IDCT_DC(32)
321 
322 #undef TR_4
323 #undef TR_8
324 #undef TR_16
325 #undef TR_32
326 
327 #undef SET
328 #undef SCALE
329 #undef ADD_AND_SCALE
330 
331 static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
332  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
333  int *borders, int width, int height,
334  int c_idx)
335 {
336  pixel *dst = (pixel *)_dst;
337  pixel *src = (pixel *)_src;
338  int offset_table[32] = { 0 };
339  int k, y, x;
340  int shift = BIT_DEPTH - 5;
341  int16_t *sao_offset_val = sao->offset_val[c_idx];
342  int sao_left_class = sao->band_position[c_idx];
343 
344  stride_dst /= sizeof(pixel);
345  stride_src /= sizeof(pixel);
346 
347  for (k = 0; k < 4; k++)
348  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
349  for (y = 0; y < height; y++) {
350  for (x = 0; x < width; x++)
351  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
352  dst += stride_dst;
353  src += stride_src;
354  }
355 }
356 
357 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
358 
359 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
360  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
361  int width, int height,
362  int c_idx, int init_x, int init_y) {
363 
364  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
365  static const int8_t pos[4][2][2] = {
366  { { -1, 0 }, { 1, 0 } }, // horizontal
367  { { 0, -1 }, { 0, 1 } }, // vertical
368  { { -1, -1 }, { 1, 1 } }, // 45 degree
369  { { 1, -1 }, { -1, 1 } }, // 135 degree
370  };
371  int16_t *sao_offset_val = sao->offset_val[c_idx];
372  int sao_eo_class = sao->eo_class[c_idx];
373  pixel *dst = (pixel *)_dst;
374  pixel *src = (pixel *)_src;
375 
376  int y_stride_src = init_y * stride_src;
377  int y_stride_dst = init_y * stride_dst;
378  int pos_0_0 = pos[sao_eo_class][0][0];
379  int pos_0_1 = pos[sao_eo_class][0][1];
380  int pos_1_0 = pos[sao_eo_class][1][0];
381  int pos_1_1 = pos[sao_eo_class][1][1];
382  int x, y;
383 
384  int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
385  int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
386  for (y = init_y; y < height; y++) {
387  for (x = init_x; x < width; x++) {
388  int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
389  int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
390  int offset_val = edge_idx[2 + diff0 + diff1];
391  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
392  }
393  y_stride_src += stride_src;
394  y_stride_dst += stride_dst;
395  y_stride_0_1 += stride_src;
396  y_stride_1_1 += stride_src;
397  }
398 }
399 
400 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
401  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
402  int *borders, int _width, int _height,
403  int c_idx, uint8_t *vert_edge,
404  uint8_t *horiz_edge, uint8_t *diag_edge)
405 {
406  int x, y;
407  pixel *dst = (pixel *)_dst;
408  pixel *src = (pixel *)_src;
409  int16_t *sao_offset_val = sao->offset_val[c_idx];
410  int sao_eo_class = sao->eo_class[c_idx];
411  int init_x = 0, init_y = 0, width = _width, height = _height;
412 
413  stride_dst /= sizeof(pixel);
414  stride_src /= sizeof(pixel);
415 
416  if (sao_eo_class != SAO_EO_VERT) {
417  if (borders[0]) {
418  int offset_val = sao_offset_val[0];
419  for (y = 0; y < height; y++) {
420  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
421  }
422  init_x = 1;
423  }
424  if (borders[2]) {
425  int offset_val = sao_offset_val[0];
426  int offset = width - 1;
427  for (x = 0; x < height; x++) {
428  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
429  }
430  width--;
431  }
432  }
433  if (sao_eo_class != SAO_EO_HORIZ) {
434  if (borders[1]) {
435  int offset_val = sao_offset_val[0];
436  for (x = init_x; x < width; x++)
437  dst[x] = av_clip_pixel(src[x] + offset_val);
438  init_y = 1;
439  }
440  if (borders[3]) {
441  int offset_val = sao_offset_val[0];
442  int y_stride_dst = stride_dst * (height - 1);
443  int y_stride_src = stride_src * (height - 1);
444  for (x = init_x; x < width; x++)
445  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
446  height--;
447  }
448  }
449 
450  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
451 }
452 
453 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
454  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
455  int *borders, int _width, int _height,
456  int c_idx, uint8_t *vert_edge,
457  uint8_t *horiz_edge, uint8_t *diag_edge)
458 {
459  int x, y;
460  pixel *dst = (pixel *)_dst;
461  pixel *src = (pixel *)_src;
462  int16_t *sao_offset_val = sao->offset_val[c_idx];
463  int sao_eo_class = sao->eo_class[c_idx];
464  int init_x = 0, init_y = 0, width = _width, height = _height;
465 
466  stride_dst /= sizeof(pixel);
467  stride_src /= sizeof(pixel);
468 
469  if (sao_eo_class != SAO_EO_VERT) {
470  if (borders[0]) {
471  int offset_val = sao_offset_val[0];
472  for (y = 0; y < height; y++) {
473  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
474  }
475  init_x = 1;
476  }
477  if (borders[2]) {
478  int offset_val = sao_offset_val[0];
479  int offset = width - 1;
480  for (x = 0; x < height; x++) {
481  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
482  }
483  width--;
484  }
485  }
486  if (sao_eo_class != SAO_EO_HORIZ) {
487  if (borders[1]) {
488  int offset_val = sao_offset_val[0];
489  for (x = init_x; x < width; x++)
490  dst[x] = av_clip_pixel(src[x] + offset_val);
491  init_y = 1;
492  }
493  if (borders[3]) {
494  int offset_val = sao_offset_val[0];
495  int y_stride_dst = stride_dst * (height - 1);
496  int y_stride_src = stride_src * (height - 1);
497  for (x = init_x; x < width; x++)
498  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
499  height--;
500  }
501  }
502 
503  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
504 
505  {
506  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
507  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
508  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
509  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
510 
511  // Restore pixels that can't be modified
512  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
513  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
514  dst[y*stride_dst] = src[y*stride_src];
515  }
516  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
517  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
518  dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
519  }
520 
521  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
522  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
523  dst[x] = src[x];
524  }
525  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
526  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
527  dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
528  }
529  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
530  dst[0] = src[0];
531  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
532  dst[width-1] = src[width-1];
533  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
534  dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
535  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
536  dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
537 
538  }
539 }
540 
541 #undef CMP
542 
543 ////////////////////////////////////////////////////////////////////////////////
544 //
545 ////////////////////////////////////////////////////////////////////////////////
546 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
547  uint8_t *_src, ptrdiff_t _srcstride,
548  int height, intptr_t mx, intptr_t my, int width)
549 {
550  int x, y;
551  pixel *src = (pixel *)_src;
552  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
553 
554  for (y = 0; y < height; y++) {
555  for (x = 0; x < width; x++)
556  dst[x] = src[x] << (14 - BIT_DEPTH);
557  src += srcstride;
558  dst += MAX_PB_SIZE;
559  }
560 }
561 
562 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
563  int height, intptr_t mx, intptr_t my, int width)
564 {
565  int y;
566  pixel *src = (pixel *)_src;
567  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
568  pixel *dst = (pixel *)_dst;
569  ptrdiff_t dststride = _dststride / sizeof(pixel);
570 
571  for (y = 0; y < height; y++) {
572  memcpy(dst, src, width * sizeof(pixel));
573  src += srcstride;
574  dst += dststride;
575  }
576 }
577 
578 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
579  int16_t *src2,
580  int height, intptr_t mx, intptr_t my, int width)
581 {
582  int x, y;
583  pixel *src = (pixel *)_src;
584  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
585  pixel *dst = (pixel *)_dst;
586  ptrdiff_t dststride = _dststride / sizeof(pixel);
587 
588  int shift = 14 + 1 - BIT_DEPTH;
589 #if BIT_DEPTH < 14
590  int offset = 1 << (shift - 1);
591 #else
592  int offset = 0;
593 #endif
594 
595  for (y = 0; y < height; y++) {
596  for (x = 0; x < width; x++)
597  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
598  src += srcstride;
599  dst += dststride;
600  src2 += MAX_PB_SIZE;
601  }
602 }
603 
604 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
605  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
606 {
607  int x, y;
608  pixel *src = (pixel *)_src;
609  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
610  pixel *dst = (pixel *)_dst;
611  ptrdiff_t dststride = _dststride / sizeof(pixel);
612  int shift = denom + 14 - BIT_DEPTH;
613 #if BIT_DEPTH < 14
614  int offset = 1 << (shift - 1);
615 #else
616  int offset = 0;
617 #endif
618 
619  ox = ox * (1 << (BIT_DEPTH - 8));
620  for (y = 0; y < height; y++) {
621  for (x = 0; x < width; x++)
622  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
623  src += srcstride;
624  dst += dststride;
625  }
626 }
627 
628 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
629  int16_t *src2,
630  int height, int denom, int wx0, int wx1,
631  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
632 {
633  int x, y;
634  pixel *src = (pixel *)_src;
635  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
636  pixel *dst = (pixel *)_dst;
637  ptrdiff_t dststride = _dststride / sizeof(pixel);
638 
639  int shift = 14 + 1 - BIT_DEPTH;
640  int log2Wd = denom + shift - 1;
641 
642  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
643  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
644  for (y = 0; y < height; y++) {
645  for (x = 0; x < width; x++) {
646  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
647  }
648  src += srcstride;
649  dst += dststride;
650  src2 += MAX_PB_SIZE;
651  }
652 }
653 
654 ////////////////////////////////////////////////////////////////////////////////
655 //
656 ////////////////////////////////////////////////////////////////////////////////
657 #define QPEL_FILTER(src, stride) \
658  (filter[0] * src[x - 3 * stride] + \
659  filter[1] * src[x - 2 * stride] + \
660  filter[2] * src[x - stride] + \
661  filter[3] * src[x ] + \
662  filter[4] * src[x + stride] + \
663  filter[5] * src[x + 2 * stride] + \
664  filter[6] * src[x + 3 * stride] + \
665  filter[7] * src[x + 4 * stride])
666 
667 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
668  uint8_t *_src, ptrdiff_t _srcstride,
669  int height, intptr_t mx, intptr_t my, int width)
670 {
671  int x, y;
672  pixel *src = (pixel*)_src;
673  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
674  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
675  for (y = 0; y < height; y++) {
676  for (x = 0; x < width; x++)
677  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
678  src += srcstride;
679  dst += MAX_PB_SIZE;
680  }
681 }
682 
683 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
684  uint8_t *_src, ptrdiff_t _srcstride,
685  int height, intptr_t mx, intptr_t my, int width)
686 {
687  int x, y;
688  pixel *src = (pixel*)_src;
689  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
690  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
691  for (y = 0; y < height; y++) {
692  for (x = 0; x < width; x++)
693  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
694  src += srcstride;
695  dst += MAX_PB_SIZE;
696  }
697 }
698 
699 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
700  uint8_t *_src,
701  ptrdiff_t _srcstride,
702  int height, intptr_t mx,
703  intptr_t my, int width)
704 {
705  int x, y;
706  const int8_t *filter;
707  pixel *src = (pixel*)_src;
708  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
709  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
710  int16_t *tmp = tmp_array;
711 
712  src -= QPEL_EXTRA_BEFORE * srcstride;
713  filter = ff_hevc_qpel_filters[mx - 1];
714  for (y = 0; y < height + QPEL_EXTRA; y++) {
715  for (x = 0; x < width; x++)
716  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
717  src += srcstride;
718  tmp += MAX_PB_SIZE;
719  }
720 
721  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
722  filter = ff_hevc_qpel_filters[my - 1];
723  for (y = 0; y < height; y++) {
724  for (x = 0; x < width; x++)
725  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
726  tmp += MAX_PB_SIZE;
727  dst += MAX_PB_SIZE;
728  }
729 }
730 
731 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
732  uint8_t *_src, ptrdiff_t _srcstride,
733  int height, intptr_t mx, intptr_t my, int width)
734 {
735  int x, y;
736  pixel *src = (pixel*)_src;
737  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
738  pixel *dst = (pixel *)_dst;
739  ptrdiff_t dststride = _dststride / sizeof(pixel);
740  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
741  int shift = 14 - BIT_DEPTH;
742 
743 #if BIT_DEPTH < 14
744  int offset = 1 << (shift - 1);
745 #else
746  int offset = 0;
747 #endif
748 
749  for (y = 0; y < height; y++) {
750  for (x = 0; x < width; x++)
751  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
752  src += srcstride;
753  dst += dststride;
754  }
755 }
756 
757 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
758  int16_t *src2,
759  int height, intptr_t mx, intptr_t my, int width)
760 {
761  int x, y;
762  pixel *src = (pixel*)_src;
763  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
764  pixel *dst = (pixel *)_dst;
765  ptrdiff_t dststride = _dststride / sizeof(pixel);
766 
767  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
768 
769  int shift = 14 + 1 - BIT_DEPTH;
770 #if BIT_DEPTH < 14
771  int offset = 1 << (shift - 1);
772 #else
773  int offset = 0;
774 #endif
775 
776  for (y = 0; y < height; y++) {
777  for (x = 0; x < width; x++)
778  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
779  src += srcstride;
780  dst += dststride;
781  src2 += MAX_PB_SIZE;
782  }
783 }
784 
785 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
786  uint8_t *_src, ptrdiff_t _srcstride,
787  int height, intptr_t mx, intptr_t my, int width)
788 {
789  int x, y;
790  pixel *src = (pixel*)_src;
791  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
792  pixel *dst = (pixel *)_dst;
793  ptrdiff_t dststride = _dststride / sizeof(pixel);
794  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
795  int shift = 14 - BIT_DEPTH;
796 
797 #if BIT_DEPTH < 14
798  int offset = 1 << (shift - 1);
799 #else
800  int offset = 0;
801 #endif
802 
803  for (y = 0; y < height; y++) {
804  for (x = 0; x < width; x++)
805  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
806  src += srcstride;
807  dst += dststride;
808  }
809 }
810 
811 
812 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
813  int16_t *src2,
814  int height, intptr_t mx, intptr_t my, int width)
815 {
816  int x, y;
817  pixel *src = (pixel*)_src;
818  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
819  pixel *dst = (pixel *)_dst;
820  ptrdiff_t dststride = _dststride / sizeof(pixel);
821 
822  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
823 
824  int shift = 14 + 1 - BIT_DEPTH;
825 #if BIT_DEPTH < 14
826  int offset = 1 << (shift - 1);
827 #else
828  int offset = 0;
829 #endif
830 
831  for (y = 0; y < height; y++) {
832  for (x = 0; x < width; x++)
833  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
834  src += srcstride;
835  dst += dststride;
836  src2 += MAX_PB_SIZE;
837  }
838 }
839 
840 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
841  uint8_t *_src, ptrdiff_t _srcstride,
842  int height, intptr_t mx, intptr_t my, int width)
843 {
844  int x, y;
845  const int8_t *filter;
846  pixel *src = (pixel*)_src;
847  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
848  pixel *dst = (pixel *)_dst;
849  ptrdiff_t dststride = _dststride / sizeof(pixel);
850  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
851  int16_t *tmp = tmp_array;
852  int shift = 14 - BIT_DEPTH;
853 
854 #if BIT_DEPTH < 14
855  int offset = 1 << (shift - 1);
856 #else
857  int offset = 0;
858 #endif
859 
860  src -= QPEL_EXTRA_BEFORE * srcstride;
861  filter = ff_hevc_qpel_filters[mx - 1];
862  for (y = 0; y < height + QPEL_EXTRA; y++) {
863  for (x = 0; x < width; x++)
864  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
865  src += srcstride;
866  tmp += MAX_PB_SIZE;
867  }
868 
869  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
870  filter = ff_hevc_qpel_filters[my - 1];
871 
872  for (y = 0; y < height; y++) {
873  for (x = 0; x < width; x++)
874  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
875  tmp += MAX_PB_SIZE;
876  dst += dststride;
877  }
878 }
879 
880 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
881  int16_t *src2,
882  int height, intptr_t mx, intptr_t my, int width)
883 {
884  int x, y;
885  const int8_t *filter;
886  pixel *src = (pixel*)_src;
887  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
888  pixel *dst = (pixel *)_dst;
889  ptrdiff_t dststride = _dststride / sizeof(pixel);
890  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
891  int16_t *tmp = tmp_array;
892  int shift = 14 + 1 - BIT_DEPTH;
893 #if BIT_DEPTH < 14
894  int offset = 1 << (shift - 1);
895 #else
896  int offset = 0;
897 #endif
898 
899  src -= QPEL_EXTRA_BEFORE * srcstride;
900  filter = ff_hevc_qpel_filters[mx - 1];
901  for (y = 0; y < height + QPEL_EXTRA; y++) {
902  for (x = 0; x < width; x++)
903  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
904  src += srcstride;
905  tmp += MAX_PB_SIZE;
906  }
907 
908  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
909  filter = ff_hevc_qpel_filters[my - 1];
910 
911  for (y = 0; y < height; y++) {
912  for (x = 0; x < width; x++)
913  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
914  tmp += MAX_PB_SIZE;
915  dst += dststride;
916  src2 += MAX_PB_SIZE;
917  }
918 }
919 
920 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
921  uint8_t *_src, ptrdiff_t _srcstride,
922  int height, int denom, int wx, int ox,
923  intptr_t mx, intptr_t my, int width)
924 {
925  int x, y;
926  pixel *src = (pixel*)_src;
927  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
928  pixel *dst = (pixel *)_dst;
929  ptrdiff_t dststride = _dststride / sizeof(pixel);
930  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
931  int shift = denom + 14 - BIT_DEPTH;
932 #if BIT_DEPTH < 14
933  int offset = 1 << (shift - 1);
934 #else
935  int offset = 0;
936 #endif
937 
938  ox = ox * (1 << (BIT_DEPTH - 8));
939  for (y = 0; y < height; y++) {
940  for (x = 0; x < width; x++)
941  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
942  src += srcstride;
943  dst += dststride;
944  }
945 }
946 
947 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
948  int16_t *src2,
949  int height, int denom, int wx0, int wx1,
950  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
951 {
952  int x, y;
953  pixel *src = (pixel*)_src;
954  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
955  pixel *dst = (pixel *)_dst;
956  ptrdiff_t dststride = _dststride / sizeof(pixel);
957 
958  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
959 
960  int shift = 14 + 1 - BIT_DEPTH;
961  int log2Wd = denom + shift - 1;
962 
963  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
964  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
965  for (y = 0; y < height; y++) {
966  for (x = 0; x < width; x++)
967  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
968  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
969  src += srcstride;
970  dst += dststride;
971  src2 += MAX_PB_SIZE;
972  }
973 }
974 
975 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
976  uint8_t *_src, ptrdiff_t _srcstride,
977  int height, int denom, int wx, int ox,
978  intptr_t mx, intptr_t my, int width)
979 {
980  int x, y;
981  pixel *src = (pixel*)_src;
982  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
983  pixel *dst = (pixel *)_dst;
984  ptrdiff_t dststride = _dststride / sizeof(pixel);
985  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
986  int shift = denom + 14 - BIT_DEPTH;
987 #if BIT_DEPTH < 14
988  int offset = 1 << (shift - 1);
989 #else
990  int offset = 0;
991 #endif
992 
993  ox = ox * (1 << (BIT_DEPTH - 8));
994  for (y = 0; y < height; y++) {
995  for (x = 0; x < width; x++)
996  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
997  src += srcstride;
998  dst += dststride;
999  }
1000 }
1001 
1002 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1003  int16_t *src2,
1004  int height, int denom, int wx0, int wx1,
1005  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1006 {
1007  int x, y;
1008  pixel *src = (pixel*)_src;
1009  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1010  pixel *dst = (pixel *)_dst;
1011  ptrdiff_t dststride = _dststride / sizeof(pixel);
1012 
1013  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
1014 
1015  int shift = 14 + 1 - BIT_DEPTH;
1016  int log2Wd = denom + shift - 1;
1017 
1018  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1019  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1020  for (y = 0; y < height; y++) {
1021  for (x = 0; x < width; x++)
1022  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1023  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1024  src += srcstride;
1025  dst += dststride;
1026  src2 += MAX_PB_SIZE;
1027  }
1028 }
1029 
1030 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1031  uint8_t *_src, ptrdiff_t _srcstride,
1032  int height, int denom, int wx, int ox,
1033  intptr_t mx, intptr_t my, int width)
1034 {
1035  int x, y;
1036  const int8_t *filter;
1037  pixel *src = (pixel*)_src;
1038  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1039  pixel *dst = (pixel *)_dst;
1040  ptrdiff_t dststride = _dststride / sizeof(pixel);
1041  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1042  int16_t *tmp = tmp_array;
1043  int shift = denom + 14 - BIT_DEPTH;
1044 #if BIT_DEPTH < 14
1045  int offset = 1 << (shift - 1);
1046 #else
1047  int offset = 0;
1048 #endif
1049 
1050  src -= QPEL_EXTRA_BEFORE * srcstride;
1051  filter = ff_hevc_qpel_filters[mx - 1];
1052  for (y = 0; y < height + QPEL_EXTRA; y++) {
1053  for (x = 0; x < width; x++)
1054  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1055  src += srcstride;
1056  tmp += MAX_PB_SIZE;
1057  }
1058 
1059  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1060  filter = ff_hevc_qpel_filters[my - 1];
1061 
1062  ox = ox * (1 << (BIT_DEPTH - 8));
1063  for (y = 0; y < height; y++) {
1064  for (x = 0; x < width; x++)
1065  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1066  tmp += MAX_PB_SIZE;
1067  dst += dststride;
1068  }
1069 }
1070 
1071 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1072  int16_t *src2,
1073  int height, int denom, int wx0, int wx1,
1074  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1075 {
1076  int x, y;
1077  const int8_t *filter;
1078  pixel *src = (pixel*)_src;
1079  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1080  pixel *dst = (pixel *)_dst;
1081  ptrdiff_t dststride = _dststride / sizeof(pixel);
1082  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1083  int16_t *tmp = tmp_array;
1084  int shift = 14 + 1 - BIT_DEPTH;
1085  int log2Wd = denom + shift - 1;
1086 
1087  src -= QPEL_EXTRA_BEFORE * srcstride;
1088  filter = ff_hevc_qpel_filters[mx - 1];
1089  for (y = 0; y < height + QPEL_EXTRA; y++) {
1090  for (x = 0; x < width; x++)
1091  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1092  src += srcstride;
1093  tmp += MAX_PB_SIZE;
1094  }
1095 
1096  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1097  filter = ff_hevc_qpel_filters[my - 1];
1098 
1099  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1100  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1101  for (y = 0; y < height; y++) {
1102  for (x = 0; x < width; x++)
1103  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1104  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1105  tmp += MAX_PB_SIZE;
1106  dst += dststride;
1107  src2 += MAX_PB_SIZE;
1108  }
1109 }
1110 
1111 ////////////////////////////////////////////////////////////////////////////////
1112 //
1113 ////////////////////////////////////////////////////////////////////////////////
1114 #define EPEL_FILTER(src, stride) \
1115  (filter[0] * src[x - stride] + \
1116  filter[1] * src[x] + \
1117  filter[2] * src[x + stride] + \
1118  filter[3] * src[x + 2 * stride])
1119 
1120 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1121  uint8_t *_src, ptrdiff_t _srcstride,
1122  int height, intptr_t mx, intptr_t my, int width)
1123 {
1124  int x, y;
1125  pixel *src = (pixel *)_src;
1126  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1127  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1128  for (y = 0; y < height; y++) {
1129  for (x = 0; x < width; x++)
1130  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1131  src += srcstride;
1132  dst += MAX_PB_SIZE;
1133  }
1134 }
1135 
1136 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1137  uint8_t *_src, ptrdiff_t _srcstride,
1138  int height, intptr_t mx, intptr_t my, int width)
1139 {
1140  int x, y;
1141  pixel *src = (pixel *)_src;
1142  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1143  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1144 
1145  for (y = 0; y < height; y++) {
1146  for (x = 0; x < width; x++)
1147  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1148  src += srcstride;
1149  dst += MAX_PB_SIZE;
1150  }
1151 }
1152 
1153 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1154  uint8_t *_src, ptrdiff_t _srcstride,
1155  int height, intptr_t mx, intptr_t my, int width)
1156 {
1157  int x, y;
1158  pixel *src = (pixel *)_src;
1159  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1160  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1161  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1162  int16_t *tmp = tmp_array;
1163 
1164  src -= EPEL_EXTRA_BEFORE * srcstride;
1165 
1166  for (y = 0; y < height + EPEL_EXTRA; y++) {
1167  for (x = 0; x < width; x++)
1168  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1169  src += srcstride;
1170  tmp += MAX_PB_SIZE;
1171  }
1172 
1173  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1174  filter = ff_hevc_epel_filters[my - 1];
1175 
1176  for (y = 0; y < height; y++) {
1177  for (x = 0; x < width; x++)
1178  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1179  tmp += MAX_PB_SIZE;
1180  dst += MAX_PB_SIZE;
1181  }
1182 }
1183 
1184 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1185  int height, intptr_t mx, intptr_t my, int width)
1186 {
1187  int x, y;
1188  pixel *src = (pixel *)_src;
1189  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1190  pixel *dst = (pixel *)_dst;
1191  ptrdiff_t dststride = _dststride / sizeof(pixel);
1192  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1193  int shift = 14 - BIT_DEPTH;
1194 #if BIT_DEPTH < 14
1195  int offset = 1 << (shift - 1);
1196 #else
1197  int offset = 0;
1198 #endif
1199 
1200  for (y = 0; y < height; y++) {
1201  for (x = 0; x < width; x++)
1202  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1203  src += srcstride;
1204  dst += dststride;
1205  }
1206 }
1207 
1208 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1209  int16_t *src2,
1210  int height, intptr_t mx, intptr_t my, int width)
1211 {
1212  int x, y;
1213  pixel *src = (pixel *)_src;
1214  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1215  pixel *dst = (pixel *)_dst;
1216  ptrdiff_t dststride = _dststride / sizeof(pixel);
1217  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1218  int shift = 14 + 1 - BIT_DEPTH;
1219 #if BIT_DEPTH < 14
1220  int offset = 1 << (shift - 1);
1221 #else
1222  int offset = 0;
1223 #endif
1224 
1225  for (y = 0; y < height; y++) {
1226  for (x = 0; x < width; x++) {
1227  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1228  }
1229  dst += dststride;
1230  src += srcstride;
1231  src2 += MAX_PB_SIZE;
1232  }
1233 }
1234 
1235 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1236  int height, intptr_t mx, intptr_t my, int width)
1237 {
1238  int x, y;
1239  pixel *src = (pixel *)_src;
1240  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1241  pixel *dst = (pixel *)_dst;
1242  ptrdiff_t dststride = _dststride / sizeof(pixel);
1243  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1244  int shift = 14 - BIT_DEPTH;
1245 #if BIT_DEPTH < 14
1246  int offset = 1 << (shift - 1);
1247 #else
1248  int offset = 0;
1249 #endif
1250 
1251  for (y = 0; y < height; y++) {
1252  for (x = 0; x < width; x++)
1253  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1254  src += srcstride;
1255  dst += dststride;
1256  }
1257 }
1258 
1259 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1260  int16_t *src2,
1261  int height, intptr_t mx, intptr_t my, int width)
1262 {
1263  int x, y;
1264  pixel *src = (pixel *)_src;
1265  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1266  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1267  pixel *dst = (pixel *)_dst;
1268  ptrdiff_t dststride = _dststride / sizeof(pixel);
1269  int shift = 14 + 1 - BIT_DEPTH;
1270 #if BIT_DEPTH < 14
1271  int offset = 1 << (shift - 1);
1272 #else
1273  int offset = 0;
1274 #endif
1275 
1276  for (y = 0; y < height; y++) {
1277  for (x = 0; x < width; x++)
1278  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1279  dst += dststride;
1280  src += srcstride;
1281  src2 += MAX_PB_SIZE;
1282  }
1283 }
1284 
1285 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1286  int height, intptr_t mx, intptr_t my, int width)
1287 {
1288  int x, y;
1289  pixel *src = (pixel *)_src;
1290  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1291  pixel *dst = (pixel *)_dst;
1292  ptrdiff_t dststride = _dststride / sizeof(pixel);
1293  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1294  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1295  int16_t *tmp = tmp_array;
1296  int shift = 14 - BIT_DEPTH;
1297 #if BIT_DEPTH < 14
1298  int offset = 1 << (shift - 1);
1299 #else
1300  int offset = 0;
1301 #endif
1302 
1303  src -= EPEL_EXTRA_BEFORE * srcstride;
1304 
1305  for (y = 0; y < height + EPEL_EXTRA; y++) {
1306  for (x = 0; x < width; x++)
1307  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1308  src += srcstride;
1309  tmp += MAX_PB_SIZE;
1310  }
1311 
1312  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1313  filter = ff_hevc_epel_filters[my - 1];
1314 
1315  for (y = 0; y < height; y++) {
1316  for (x = 0; x < width; x++)
1317  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1318  tmp += MAX_PB_SIZE;
1319  dst += dststride;
1320  }
1321 }
1322 
1323 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1324  int16_t *src2,
1325  int height, intptr_t mx, intptr_t my, int width)
1326 {
1327  int x, y;
1328  pixel *src = (pixel *)_src;
1329  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1330  pixel *dst = (pixel *)_dst;
1331  ptrdiff_t dststride = _dststride / sizeof(pixel);
1332  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1333  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1334  int16_t *tmp = tmp_array;
1335  int shift = 14 + 1 - BIT_DEPTH;
1336 #if BIT_DEPTH < 14
1337  int offset = 1 << (shift - 1);
1338 #else
1339  int offset = 0;
1340 #endif
1341 
1342  src -= EPEL_EXTRA_BEFORE * srcstride;
1343 
1344  for (y = 0; y < height + EPEL_EXTRA; y++) {
1345  for (x = 0; x < width; x++)
1346  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1347  src += srcstride;
1348  tmp += MAX_PB_SIZE;
1349  }
1350 
1351  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1352  filter = ff_hevc_epel_filters[my - 1];
1353 
1354  for (y = 0; y < height; y++) {
1355  for (x = 0; x < width; x++)
1356  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1357  tmp += MAX_PB_SIZE;
1358  dst += dststride;
1359  src2 += MAX_PB_SIZE;
1360  }
1361 }
1362 
1363 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1364  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1365 {
1366  int x, y;
1367  pixel *src = (pixel *)_src;
1368  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1369  pixel *dst = (pixel *)_dst;
1370  ptrdiff_t dststride = _dststride / sizeof(pixel);
1371  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1372  int shift = denom + 14 - BIT_DEPTH;
1373 #if BIT_DEPTH < 14
1374  int offset = 1 << (shift - 1);
1375 #else
1376  int offset = 0;
1377 #endif
1378 
1379  ox = ox * (1 << (BIT_DEPTH - 8));
1380  for (y = 0; y < height; y++) {
1381  for (x = 0; x < width; x++) {
1382  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1383  }
1384  dst += dststride;
1385  src += srcstride;
1386  }
1387 }
1388 
1389 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1390  int16_t *src2,
1391  int height, int denom, int wx0, int wx1,
1392  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1393 {
1394  int x, y;
1395  pixel *src = (pixel *)_src;
1396  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1397  pixel *dst = (pixel *)_dst;
1398  ptrdiff_t dststride = _dststride / sizeof(pixel);
1399  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1400  int shift = 14 + 1 - BIT_DEPTH;
1401  int log2Wd = denom + shift - 1;
1402 
1403  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1404  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1405  for (y = 0; y < height; y++) {
1406  for (x = 0; x < width; x++)
1407  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1408  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1409  src += srcstride;
1410  dst += dststride;
1411  src2 += MAX_PB_SIZE;
1412  }
1413 }
1414 
1415 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1416  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1417 {
1418  int x, y;
1419  pixel *src = (pixel *)_src;
1420  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1421  pixel *dst = (pixel *)_dst;
1422  ptrdiff_t dststride = _dststride / sizeof(pixel);
1423  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1424  int shift = denom + 14 - BIT_DEPTH;
1425 #if BIT_DEPTH < 14
1426  int offset = 1 << (shift - 1);
1427 #else
1428  int offset = 0;
1429 #endif
1430 
1431  ox = ox * (1 << (BIT_DEPTH - 8));
1432  for (y = 0; y < height; y++) {
1433  for (x = 0; x < width; x++) {
1434  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1435  }
1436  dst += dststride;
1437  src += srcstride;
1438  }
1439 }
1440 
1441 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1442  int16_t *src2,
1443  int height, int denom, int wx0, int wx1,
1444  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1445 {
1446  int x, y;
1447  pixel *src = (pixel *)_src;
1448  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1449  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1450  pixel *dst = (pixel *)_dst;
1451  ptrdiff_t dststride = _dststride / sizeof(pixel);
1452  int shift = 14 + 1 - BIT_DEPTH;
1453  int log2Wd = denom + shift - 1;
1454 
1455  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1456  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1457  for (y = 0; y < height; y++) {
1458  for (x = 0; x < width; x++)
1459  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1460  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1461  src += srcstride;
1462  dst += dststride;
1463  src2 += MAX_PB_SIZE;
1464  }
1465 }
1466 
1467 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1468  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1469 {
1470  int x, y;
1471  pixel *src = (pixel *)_src;
1472  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1473  pixel *dst = (pixel *)_dst;
1474  ptrdiff_t dststride = _dststride / sizeof(pixel);
1475  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1476  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1477  int16_t *tmp = tmp_array;
1478  int shift = denom + 14 - BIT_DEPTH;
1479 #if BIT_DEPTH < 14
1480  int offset = 1 << (shift - 1);
1481 #else
1482  int offset = 0;
1483 #endif
1484 
1485  src -= EPEL_EXTRA_BEFORE * srcstride;
1486 
1487  for (y = 0; y < height + EPEL_EXTRA; y++) {
1488  for (x = 0; x < width; x++)
1489  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1490  src += srcstride;
1491  tmp += MAX_PB_SIZE;
1492  }
1493 
1494  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1495  filter = ff_hevc_epel_filters[my - 1];
1496 
1497  ox = ox * (1 << (BIT_DEPTH - 8));
1498  for (y = 0; y < height; y++) {
1499  for (x = 0; x < width; x++)
1500  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1501  tmp += MAX_PB_SIZE;
1502  dst += dststride;
1503  }
1504 }
1505 
1506 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1507  int16_t *src2,
1508  int height, int denom, int wx0, int wx1,
1509  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1510 {
1511  int x, y;
1512  pixel *src = (pixel *)_src;
1513  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1514  pixel *dst = (pixel *)_dst;
1515  ptrdiff_t dststride = _dststride / sizeof(pixel);
1516  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1517  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1518  int16_t *tmp = tmp_array;
1519  int shift = 14 + 1 - BIT_DEPTH;
1520  int log2Wd = denom + shift - 1;
1521 
1522  src -= EPEL_EXTRA_BEFORE * srcstride;
1523 
1524  for (y = 0; y < height + EPEL_EXTRA; y++) {
1525  for (x = 0; x < width; x++)
1526  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1527  src += srcstride;
1528  tmp += MAX_PB_SIZE;
1529  }
1530 
1531  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1532  filter = ff_hevc_epel_filters[my - 1];
1533 
1534  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1535  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1536  for (y = 0; y < height; y++) {
1537  for (x = 0; x < width; x++)
1538  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1539  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1540  tmp += MAX_PB_SIZE;
1541  dst += dststride;
1542  src2 += MAX_PB_SIZE;
1543  }
1544 }// line zero
1545 #define P3 pix[-4 * xstride]
1546 #define P2 pix[-3 * xstride]
1547 #define P1 pix[-2 * xstride]
1548 #define P0 pix[-1 * xstride]
1549 #define Q0 pix[0 * xstride]
1550 #define Q1 pix[1 * xstride]
1551 #define Q2 pix[2 * xstride]
1552 #define Q3 pix[3 * xstride]
1553 
1554 // line three. used only for deblocking decision
1555 #define TP3 pix[-4 * xstride + 3 * ystride]
1556 #define TP2 pix[-3 * xstride + 3 * ystride]
1557 #define TP1 pix[-2 * xstride + 3 * ystride]
1558 #define TP0 pix[-1 * xstride + 3 * ystride]
1559 #define TQ0 pix[0 * xstride + 3 * ystride]
1560 #define TQ1 pix[1 * xstride + 3 * ystride]
1561 #define TQ2 pix[2 * xstride + 3 * ystride]
1562 #define TQ3 pix[3 * xstride + 3 * ystride]
1563 
1565  ptrdiff_t _xstride, ptrdiff_t _ystride,
1566  int beta, int *_tc,
1567  uint8_t *_no_p, uint8_t *_no_q)
1568 {
1569  int d, j;
1570  pixel *pix = (pixel *)_pix;
1571  ptrdiff_t xstride = _xstride / sizeof(pixel);
1572  ptrdiff_t ystride = _ystride / sizeof(pixel);
1573 
1574  beta <<= BIT_DEPTH - 8;
1575 
1576  for (j = 0; j < 2; j++) {
1577  const int dp0 = abs(P2 - 2 * P1 + P0);
1578  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1579  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1580  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1581  const int d0 = dp0 + dq0;
1582  const int d3 = dp3 + dq3;
1583  const int tc = _tc[j] << (BIT_DEPTH - 8);
1584  const int no_p = _no_p[j];
1585  const int no_q = _no_q[j];
1586 
1587  if (d0 + d3 >= beta) {
1588  pix += 4 * ystride;
1589  continue;
1590  } else {
1591  const int beta_3 = beta >> 3;
1592  const int beta_2 = beta >> 2;
1593  const int tc25 = ((tc * 5 + 1) >> 1);
1594 
1595  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1596  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1597  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1598  // strong filtering
1599  const int tc2 = tc << 1;
1600  for (d = 0; d < 4; d++) {
1601  const int p3 = P3;
1602  const int p2 = P2;
1603  const int p1 = P1;
1604  const int p0 = P0;
1605  const int q0 = Q0;
1606  const int q1 = Q1;
1607  const int q2 = Q2;
1608  const int q3 = Q3;
1609  if (!no_p) {
1610  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1611  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1612  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1613  }
1614  if (!no_q) {
1615  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1616  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1617  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1618  }
1619  pix += ystride;
1620  }
1621  } else { // normal filtering
1622  int nd_p = 1;
1623  int nd_q = 1;
1624  const int tc_2 = tc >> 1;
1625  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1626  nd_p = 2;
1627  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1628  nd_q = 2;
1629 
1630  for (d = 0; d < 4; d++) {
1631  const int p2 = P2;
1632  const int p1 = P1;
1633  const int p0 = P0;
1634  const int q0 = Q0;
1635  const int q1 = Q1;
1636  const int q2 = Q2;
1637  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1638  if (abs(delta0) < 10 * tc) {
1639  delta0 = av_clip(delta0, -tc, tc);
1640  if (!no_p)
1641  P0 = av_clip_pixel(p0 + delta0);
1642  if (!no_q)
1643  Q0 = av_clip_pixel(q0 - delta0);
1644  if (!no_p && nd_p > 1) {
1645  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1646  P1 = av_clip_pixel(p1 + deltap1);
1647  }
1648  if (!no_q && nd_q > 1) {
1649  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1650  Q1 = av_clip_pixel(q1 + deltaq1);
1651  }
1652  }
1653  pix += ystride;
1654  }
1655  }
1656  }
1657  }
1658 }
1659 
1660 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1661  ptrdiff_t _ystride, int *_tc,
1662  uint8_t *_no_p, uint8_t *_no_q)
1663 {
1664  int d, j, no_p, no_q;
1665  pixel *pix = (pixel *)_pix;
1666  ptrdiff_t xstride = _xstride / sizeof(pixel);
1667  ptrdiff_t ystride = _ystride / sizeof(pixel);
1668 
1669  for (j = 0; j < 2; j++) {
1670  const int tc = _tc[j] << (BIT_DEPTH - 8);
1671  if (tc <= 0) {
1672  pix += 4 * ystride;
1673  continue;
1674  }
1675  no_p = _no_p[j];
1676  no_q = _no_q[j];
1677 
1678  for (d = 0; d < 4; d++) {
1679  int delta0;
1680  const int p1 = P1;
1681  const int p0 = P0;
1682  const int q0 = Q0;
1683  const int q1 = Q1;
1684  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1685  if (!no_p)
1686  P0 = av_clip_pixel(p0 + delta0);
1687  if (!no_q)
1688  Q0 = av_clip_pixel(q0 - delta0);
1689  pix += ystride;
1690  }
1691  }
1692 }
1693 
1694 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1695  int32_t *tc, uint8_t *no_p,
1696  uint8_t *no_q)
1697 {
1698  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1699 }
1700 
1701 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1702  int32_t *tc, uint8_t *no_p,
1703  uint8_t *no_q)
1704 {
1705  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1706 }
1707 
1708 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1709  int beta, int32_t *tc, uint8_t *no_p,
1710  uint8_t *no_q)
1711 {
1712  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1713  beta, tc, no_p, no_q);
1714 }
1715 
1716 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1717  int beta, int32_t *tc, uint8_t *no_p,
1718  uint8_t *no_q)
1719 {
1720  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1721  beta, tc, no_p, no_q);
1722 }
1723 
1724 #undef P3
1725 #undef P2
1726 #undef P1
1727 #undef P0
1728 #undef Q0
1729 #undef Q1
1730 #undef Q2
1731 #undef Q3
1732 
1733 #undef TP3
1734 #undef TP2
1735 #undef TP1
1736 #undef TP0
1737 #undef TQ0
1738 #undef TQ1
1739 #undef TQ2
1740 #undef TQ3