FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "bit_depth_template.c"
25 #include "hevcdsp.h"
26 #include "hevc.h"
27 
28 #define SET(dst, x) (dst) = (x)
29 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
30 #define ADD_AND_SCALE(dst, x) (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
31 
32 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int size,
33  GetBitContext *gb, int pcm_bit_depth)
34 {
35  int x, y;
36  pixel *dst = (pixel*)_dst;
37  ptrdiff_t stride = _stride / sizeof(pixel);
38 
39  for (y = 0; y < size; y++) {
40  for (x = 0; x < size; x++)
41  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
42  dst += stride;
43  }
44 }
45 
46 static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
47 {
48  int x, y;
49  pixel *dst = (pixel*)_dst;
50  ptrdiff_t stride = _stride / sizeof(pixel);
51 
52  for (y = 0; y < 4; y++) {
53  for (x = 0; x < 4; x++) {
54  dst[x] = av_clip_pixel(dst[x] + *coeffs);
55  coeffs++;
56  }
57  dst += stride;
58  }
59 
60 }
61 
62 static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
63 {
64  int x, y;
65  pixel *dst = (pixel*)_dst;
66  ptrdiff_t stride = _stride / sizeof(pixel);
67 
68  for (y = 0; y < 8; y++) {
69  for (x = 0; x < 8; x++) {
70  dst[x] = av_clip_pixel(dst[x] + *coeffs);
71  coeffs++;
72  }
73  dst += stride;
74  }
75 }
76 
77 static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
78 {
79  int x, y;
80  pixel *dst = (pixel*)_dst;
81  ptrdiff_t stride = _stride / sizeof(pixel);
82 
83  for (y = 0; y < 16; y++) {
84  for (x = 0; x < 16; x++) {
85  dst[x] = av_clip_pixel(dst[x] + *coeffs);
86  coeffs++;
87  }
88  dst += stride;
89  }
90 
91 }
92 
93 static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
94 {
95  int x, y;
96  pixel *dst = (pixel*)_dst;
97  ptrdiff_t stride = _stride / sizeof(pixel);
98 
99  for (y = 0; y < 32; y++) {
100  for (x = 0; x < 32; x++) {
101  dst[x] = av_clip_pixel(dst[x] + *coeffs);
102  coeffs++;
103  }
104  dst += stride;
105  }
106 }
107 
108 static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
109 {
110  pixel *dst = (pixel*)_dst;
111  ptrdiff_t stride = _stride / sizeof(pixel);
112  int shift = 13 - BIT_DEPTH;
113 #if BIT_DEPTH <= 13
114  int offset = 1 << (shift - 1);
115 #else
116  int offset = 0;
117 #endif
118  int x, y;
119 
120  for (y = 0; y < 4*4; y+=4) {
121  for (x = 0; x < 4; x++) {
122  dst[x] = av_clip_pixel(dst[x] + ((coeffs[y + x] + offset) >> shift));
123  }
124  dst += stride;
125  }
126 }
127 
128 static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
129 {
130 #define TR_4x4_LUMA(dst, src, step, assign) \
131  do { \
132  int c0 = src[0*step] + src[2*step]; \
133  int c1 = src[2*step] + src[3*step]; \
134  int c2 = src[0*step] - src[3*step]; \
135  int c3 = 74 * src[1*step]; \
136  \
137  assign(dst[2*step], 74 * (src[0*step] - src[2*step] + src[3*step])); \
138  assign(dst[0*step], 29 * c0 + 55 * c1 + c3); \
139  assign(dst[1*step], 55 * c2 - 29 * c1 + c3); \
140  assign(dst[3*step], 55 * c0 + 29 * c2 - c3); \
141  } while (0)
142 
143  int i;
144  pixel *dst = (pixel*)_dst;
145  ptrdiff_t stride = _stride / sizeof(pixel);
146  int shift = 7;
147  int add = 1 << (shift - 1);
148  int16_t *src = coeffs;
149 
150  for (i = 0; i < 4; i++) {
151  TR_4x4_LUMA(src, src, 4, SCALE);
152  src++;
153  }
154 
155  shift = 20 - BIT_DEPTH;
156  add = 1 << (shift - 1);
157  for (i = 0; i < 4; i++) {
158  TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
159  coeffs += 4;
160  dst += stride;
161  }
162 
163 #undef TR_4x4_LUMA
164 }
165 
166 #define TR_4(dst, src, dstep, sstep, assign) \
167  do { \
168  const int e0 = transform[8*0][0] * src[0*sstep] + \
169  transform[8*2][0] * src[2*sstep]; \
170  const int e1 = transform[8*0][1] * src[0*sstep] + \
171  transform[8*2][1] * src[2*sstep]; \
172  const int o0 = transform[8*1][0] * src[1*sstep] + \
173  transform[8*3][0] * src[3*sstep]; \
174  const int o1 = transform[8*1][1] * src[1*sstep] + \
175  transform[8*3][1] * src[3*sstep]; \
176  \
177  assign(dst[0*dstep], e0 + o0); \
178  assign(dst[1*dstep], e1 + o1); \
179  assign(dst[2*dstep], e1 - o1); \
180  assign(dst[3*dstep], e0 - o0); \
181  } while (0)
182 #define TR_4_1(dst, src) TR_4(dst, src, 4, 4, SCALE)
183 #define TR_4_2(dst, src) TR_4(dst, src, 1, 1, ADD_AND_SCALE)
184 
185 static void FUNC(transform_4x4_add)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
186 {
187  int i;
188  pixel *dst = (pixel*)_dst;
189  ptrdiff_t stride = _stride / sizeof(pixel);
190  int shift = 7;
191  int add = 1 << (shift - 1);
192  int16_t *src = coeffs;
193 
194  for (i = 0; i < 4; i++) {
195  TR_4_1(src, src);
196  src++;
197  }
198 
199  shift = 20 - BIT_DEPTH;
200  add = 1 << (shift - 1);
201  for (i = 0; i < 4; i++) {
202  TR_4_2(dst, coeffs);
203  coeffs += 4;
204  dst += stride;
205  }
206 }
207 
208 #define TR_8(dst, src, dstep, sstep, assign) \
209  do { \
210  int i, j; \
211  int e_8[4]; \
212  int o_8[4] = { 0 }; \
213  for (i = 0; i < 4; i++) \
214  for (j = 1; j < 8; j += 2) \
215  o_8[i] += transform[4*j][i] * src[j*sstep]; \
216  TR_4(e_8, src, 1, 2*sstep, SET); \
217  \
218  for (i = 0; i < 4; i++) { \
219  assign(dst[i*dstep], e_8[i] + o_8[i]); \
220  assign(dst[(7-i)*dstep], e_8[i] - o_8[i]); \
221  } \
222  } while (0)
223 #define TR_16(dst, src, dstep, sstep, assign) \
224  do { \
225  int i, j; \
226  int e_16[8]; \
227  int o_16[8] = { 0 }; \
228  for (i = 0; i < 8; i++) \
229  for (j = 1; j < 16; j += 2) \
230  o_16[i] += transform[2*j][i] * src[j*sstep]; \
231  TR_8(e_16, src, 1, 2*sstep, SET); \
232  \
233  for (i = 0; i < 8; i++) { \
234  assign(dst[i*dstep], e_16[i] + o_16[i]); \
235  assign(dst[(15-i)*dstep], e_16[i] - o_16[i]); \
236  } \
237  } while (0)
238 #define TR_32(dst, src, dstep, sstep, assign) \
239  do { \
240  int i, j; \
241  int e_32[16]; \
242  int o_32[16] = { 0 }; \
243  for (i = 0; i < 16; i++) \
244  for (j = 1; j < 32; j += 2) \
245  o_32[i] += transform[j][i] * src[j*sstep]; \
246  TR_16(e_32, src, 1, 2*sstep, SET); \
247  \
248  for (i = 0; i < 16; i++) { \
249  assign(dst[i*dstep], e_32[i] + o_32[i]); \
250  assign(dst[(31-i)*dstep], e_32[i] - o_32[i]); \
251  } \
252  } while (0)
253 
254 #define TR_8_1(dst, src) TR_8(dst, src, 8, 8, SCALE)
255 #define TR_16_1(dst, src) TR_16(dst, src, 16, 16, SCALE)
256 #define TR_32_1(dst, src) TR_32(dst, src, 32, 32, SCALE)
257 
258 #define TR_8_2(dst, src) TR_8(dst, src, 1, 1, ADD_AND_SCALE)
259 #define TR_16_2(dst, src) TR_16(dst, src, 1, 1, ADD_AND_SCALE)
260 #define TR_32_2(dst, src) TR_32(dst, src, 1, 1, ADD_AND_SCALE)
261 
262 static void FUNC(transform_8x8_add)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
263 {
264  int i;
265  pixel *dst = (pixel*)_dst;
266  ptrdiff_t stride = _stride / sizeof(pixel);
267  int shift = 7;
268  int add = 1 << (shift - 1);
269  int16_t *src = coeffs;
270 
271  for (i = 0; i < 8; i++) {
272  TR_8_1(src, src);
273  src++;
274  }
275 
276  shift = 20 - BIT_DEPTH;
277  add = 1 << (shift - 1);
278  for (i = 0; i < 8; i++) {
279  TR_8_2(dst, coeffs);
280  coeffs += 8;
281  dst += stride;
282  }
283 }
284 
285 static void FUNC(transform_16x16_add)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
286 {
287  int i;
288  pixel *dst = (pixel*)_dst;
289  ptrdiff_t stride = _stride / sizeof(pixel);
290  int shift = 7;
291  int add = 1 << (shift - 1);
292  int16_t *src = coeffs;
293 
294  for (i = 0; i < 16; i++) {
295  TR_16_1(src, src);
296  src++;
297  }
298 
299  shift = 20 - BIT_DEPTH;
300  add = 1 << (shift - 1);
301  for (i = 0; i < 16; i++) {
302  TR_16_2(dst, coeffs);
303  coeffs += 16;
304  dst += stride;
305  }
306 }
307 
308 static void FUNC(transform_32x32_add)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride)
309 {
310 #define IT32x32_even(i,w) ( src[ 0*w] * transform[ 0][i] ) + ( src[16*w] * transform[16][i] )
311 #define IT32x32_odd(i,w) ( src[ 8*w] * transform[ 8][i] ) + ( src[24*w] * transform[24][i] )
312 #define IT16x16(i,w) ( src[ 4*w] * transform[ 4][i] ) + ( src[12*w] * transform[12][i] ) + ( src[20*w] * transform[20][i] ) + ( src[28*w] * transform[28][i] )
313 #define IT8x8(i,w) ( src[ 2*w] * transform[ 2][i] ) + ( src[ 6*w] * transform[ 6][i] ) + ( src[10*w] * transform[10][i] ) + ( src[14*w] * transform[14][i] ) + \
314  ( src[18*w] * transform[18][i] ) + ( src[22*w] * transform[22][i] ) + ( src[26*w] * transform[26][i] ) + ( src[30*w] * transform[30][i] )
315 #define IT4x4(i,w) ( src[ 1*w] * transform[ 1][i] ) + ( src[ 3*w] * transform[ 3][i] ) + ( src[ 5*w] * transform[ 5][i] ) + ( src[ 7*w] * transform[ 7][i] ) + \
316  ( src[ 9*w] * transform[ 9][i] ) + ( src[11*w] * transform[11][i] ) + ( src[13*w] * transform[13][i] ) + ( src[15*w] * transform[15][i] ) + \
317  ( src[17*w] * transform[17][i] ) + ( src[19*w] * transform[19][i] ) + ( src[21*w] * transform[21][i] ) + ( src[23*w] * transform[23][i] ) + \
318  ( src[25*w] * transform[25][i] ) + ( src[27*w] * transform[27][i] ) + ( src[29*w] * transform[29][i] ) + ( src[31*w] * transform[31][i] )
319  int i;
320  pixel *dst = (pixel*)_dst;
321  ptrdiff_t stride = _stride / sizeof(pixel);
322  int shift = 7;
323  int add = 1 << (shift - 1);
324  int16_t *src = coeffs;
325 
326  for (i = 0; i < 32; i++) {
327  TR_32_1(src, src);
328  src++;
329  }
330  src = coeffs;
331  shift = 20 - BIT_DEPTH;
332  add = 1 << (shift - 1);
333  for (i = 0; i < 32; i++) {
334  TR_32_2(dst, coeffs);
335  coeffs += 32;
336  dst += stride;
337  }
338 #undef IT32x32_even
339 #undef IT32x32_odd
340 #undef IT16x16
341 #undef IT8x8
342 #undef IT4x4
343 }
344 
345 static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
346  ptrdiff_t _stride, SAOParams *sao,
347  int *borders, int width, int height,
348  int c_idx, int class)
349 {
350  pixel *dst = (pixel*)_dst;
351  pixel *src = (pixel*)_src;
352  ptrdiff_t stride = _stride / sizeof(pixel);
353  int offset_table[32] = { 0 };
354  int k, y, x;
355  int chroma = !!c_idx;
356  int shift = BIT_DEPTH - 5;
357  int *sao_offset_val = sao->offset_val[c_idx];
358  int sao_left_class = sao->band_position[c_idx];
359  int init_y = 0, init_x = 0;
360 
361  switch (class) {
362  case 0:
363  if (!borders[2])
364  width -= ((8 >> chroma) + 2);
365  if (!borders[3])
366  height -= ((4 >> chroma) + 2);
367  break;
368  case 1:
369  init_y = -(4 >> chroma) - 2;
370  if (!borders[2])
371  width -= ((8 >> chroma) + 2);
372  height = (4 >> chroma) + 2;
373  break;
374  case 2:
375  init_x = -(8 >> chroma) - 2;
376  width = (8 >> chroma) + 2;
377  if (!borders[3])
378  height -= ((4 >> chroma) + 2);
379  break;
380  case 3:
381  init_y = -(4 >> chroma) - 2;
382  init_x = -(8 >> chroma) - 2;
383  width = (8 >> chroma) + 2;
384  height = (4 >> chroma) + 2;
385  break;
386  }
387 
388  dst = dst + (init_y * stride + init_x);
389  src = src + (init_y * stride + init_x);
390  for (k = 0; k < 4; k++)
391  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
392  for (y = 0; y < height; y++) {
393  for (x = 0; x < width; x++)
394  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
395  dst += stride;
396  src += stride;
397  }
398 }
399 
401  ptrdiff_t stride, SAOParams *sao,
402  int *borders, int width, int height,
403  int c_idx)
404 {
405  FUNC(sao_band_filter)(dst, src, stride, sao, borders, width, height, c_idx, 0);
406 }
407 
409  ptrdiff_t stride, SAOParams *sao,
410  int *borders, int width, int height,
411  int c_idx)
412 {
413  FUNC(sao_band_filter)(dst, src, stride, sao, borders, width, height, c_idx, 1);
414 }
415 
417  ptrdiff_t stride, SAOParams *sao,
418  int *borders, int width, int height,
419  int c_idx)
420 {
421  FUNC(sao_band_filter)(dst, src, stride, sao, borders, width, height, c_idx, 2);
422 }
423 
424 static void FUNC(sao_band_filter_3)(uint8_t *_dst, uint8_t *_src,
425  ptrdiff_t _stride, SAOParams *sao,
426  int *borders, int width, int height,
427  int c_idx)
428 {
429  FUNC(sao_band_filter)(_dst, _src, _stride, sao, borders, width, height, c_idx, 3);
430 }
431 
432 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
433  ptrdiff_t _stride, SAOParams *sao,
434  int *borders, int _width, int _height,
435  int c_idx,
436  uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
437 {
438  int x, y;
439  pixel *dst = (pixel*)_dst;
440  pixel *src = (pixel*)_src;
441  ptrdiff_t stride = _stride / sizeof(pixel);
442  int chroma = !!c_idx;
443  int *sao_offset_val = sao->offset_val[c_idx];
444  int sao_eo_class = sao->eo_class[c_idx];
445 
446  static const int8_t pos[4][2][2] = {
447  {{ -1, 0}, { 1, 0}}, // horizontal
448  {{ 0, -1}, { 0, 1}}, // vertical
449  {{ -1, -1}, { 1, 1}}, // 45 degree
450  {{ 1, -1}, {-1, 1}}, // 135 degree
451  };
452  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
453 
454  int init_x = 0, init_y = 0, width = _width, height = _height;
455 
456 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
457 
458  if (!borders[2])
459  width -= (8 >> chroma) + 2;
460  if (!borders[3])
461  height -= (4 >> chroma) + 2;
462 
463  dst = dst + (init_y * stride + init_x);
464  src = src + (init_y * stride + init_x);
465  init_y = init_x = 0;
466  if (sao_eo_class != SAO_EO_VERT) {
467  if (borders[0]) {
468  int offset_val = sao_offset_val[0];
469  int y_stride = 0;
470  for (y = 0; y < height; y++) {
471  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
472  y_stride += stride;
473  }
474  init_x = 1;
475  }
476  if (borders[2]) {
477  int offset_val = sao_offset_val[0];
478  int x_stride = width - 1;
479  for (x = 0; x < height; x++) {
480  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
481  x_stride += stride;
482  }
483  width --;
484  }
485 
486  }
487  if (sao_eo_class != SAO_EO_HORIZ ) {
488  if (borders[1]){
489  int offset_val = sao_offset_val[0];
490  for (x = init_x; x < width; x++) {
491  dst[x] = av_clip_pixel(src[x] + offset_val);
492  }
493  init_y = 1;
494  }
495  if (borders[3]){
496  int offset_val = sao_offset_val[0];
497  int y_stride = stride * (height - 1);
498  for (x = init_x; x < width; x++) {
499  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
500  }
501  height--;
502  }
503  }
504  {
505  int y_stride = init_y * stride;
506  int pos_0_0 = pos[sao_eo_class][0][0];
507  int pos_0_1 = pos[sao_eo_class][0][1];
508  int pos_1_0 = pos[sao_eo_class][1][0];
509  int pos_1_1 = pos[sao_eo_class][1][1];
510 
511  int y_stride_0_1 = (init_y + pos_0_1) * stride;
512  int y_stride_1_1 = (init_y + pos_1_1) * stride;
513  for (y = init_y; y < height; y++) {
514  for (x = init_x; x < width; x++) {
515  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
516  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
517  int offset_val = edge_idx[2 + diff0 + diff1];
518  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
519  }
520  y_stride += stride;
521  y_stride_0_1 += stride;
522  y_stride_1_1 += stride;
523  }
524  }
525 
526  {
527  // Restore pixels that can't be modified
528  int save_upper_left = !diag_edge && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
529  if (vert_edge && sao_eo_class != SAO_EO_VERT)
530  for (y = init_y+save_upper_left; y< height; y++)
531  dst[y*stride] = src[y*stride];
532  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
533  for(x = init_x+save_upper_left; x<width; x++)
534  dst[x] = src[x];
535  if(diag_edge && sao_eo_class == SAO_EO_135D)
536  dst[0] = src[0];
537  }
538 
539 #undef CMP
540 }
541 
542 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
543  ptrdiff_t _stride, SAOParams *sao,
544  int *borders, int _width, int _height,
545  int c_idx,
546  uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
547 {
548  int x, y;
549  pixel *dst = (pixel*)_dst;
550  pixel *src = (pixel*)_src;
551  ptrdiff_t stride = _stride / sizeof(pixel);
552  int chroma = !!c_idx;
553  int *sao_offset_val = sao->offset_val[c_idx];
554  int sao_eo_class = sao->eo_class[c_idx];
555 
556  static const int8_t pos[4][2][2] = {
557  {{ -1, 0}, { 1, 0 }}, // horizontal
558  {{ 0, -1}, { 0, 1 }}, // vertical
559  {{ -1, -1}, { 1, 1 }}, // 45 degree
560  {{ 1, -1}, {-1, 1 }}, // 135 degree
561  };
562  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
563 
564  int init_x = 0, init_y = 0, width = _width, height = _height;
565 
566 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
567 
568  init_y = -(4 >> chroma) - 2;
569  if (!borders[2])
570  width -= (8 >> chroma) + 2;
571  height = (4 >> chroma) + 2;
572 
573  dst = dst + (init_y * stride + init_x);
574  src = src + (init_y * stride + init_x);
575  init_y = init_x = 0;
576  if (sao_eo_class != SAO_EO_VERT) {
577  if (borders[0]) {
578  int offset_val = sao_offset_val[0];
579  int y_stride = 0;
580  for (y = 0; y < height; y++) {
581  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
582  y_stride += stride;
583  }
584  init_x = 1;
585  }
586  if (borders[2]) {
587  int offset_val = sao_offset_val[0];
588  int x_stride = width - 1;
589  for (x = 0; x < height; x++) {
590  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
591  x_stride += stride;
592  }
593  width--;
594  }
595 
596  }
597  {
598  int y_stride = init_y * stride;
599  int pos_0_0 = pos[sao_eo_class][0][0];
600  int pos_0_1 = pos[sao_eo_class][0][1];
601  int pos_1_0 = pos[sao_eo_class][1][0];
602  int pos_1_1 = pos[sao_eo_class][1][1];
603 
604  int y_stride_0_1 = (init_y + pos_0_1) * stride;
605  int y_stride_1_1 = (init_y + pos_1_1) * stride;
606  for (y = init_y; y < height; y++) {
607  for (x = init_x; x < width; x++) {
608  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
609  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
610  int offset_val = edge_idx[2 + diff0 + diff1];
611  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
612  }
613  y_stride += stride;
614  y_stride_0_1 += stride;
615  y_stride_1_1 += stride;
616  }
617  }
618 
619  {
620  // Restore pixels that can't be modified
621  int save_lower_left = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[0];
622  if(vert_edge && sao_eo_class != SAO_EO_VERT)
623  for(y = init_y; y< height-save_lower_left; y++)
624  dst[y*stride] = src[y*stride];
625  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
626  for(x = init_x+save_lower_left; x<width; x++)
627  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
628  if(diag_edge && sao_eo_class == SAO_EO_45D)
629  dst[stride*(height-1)] = src[stride*(height-1)];
630  }
631 
632 #undef CMP
633 }
634 
635 static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src,
636  ptrdiff_t _stride, SAOParams *sao,
637  int *borders, int _width, int _height,
638  int c_idx,
639  uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
640 {
641  int x, y;
642  pixel *dst = (pixel*)_dst;
643  pixel *src = (pixel*)_src;
644  ptrdiff_t stride = _stride / sizeof(pixel);
645  int chroma = !!c_idx;
646  int *sao_offset_val = sao->offset_val[c_idx];
647  int sao_eo_class = sao->eo_class[c_idx];
648 
649  static const int8_t pos[4][2][2] = {
650  {{ -1, 0}, { 1, 0}}, // horizontal
651  {{ 0, -1}, { 0, 1}}, // vertical
652  {{ -1, -1}, { 1, 1}}, // 45 degree
653  {{ 1, -1}, {-1, 1}}, // 135 degree
654  };
655  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
656 
657  int init_x = 0, init_y = 0, width = _width, height = _height;
658 
659 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
660 
661  init_x = -(8 >> chroma) - 2;
662  width = (8 >> chroma) + 2;
663  if (!borders[3])
664  height -= (4 >> chroma) + 2;
665 
666  dst = dst + (init_y * stride + init_x);
667  src = src + (init_y * stride + init_x);
668  init_y = init_x = 0;
669  if (sao_eo_class != SAO_EO_HORIZ) {
670  if (borders[1]){
671  int offset_val = sao_offset_val[0];
672  for (x = init_x; x < width; x++) {
673  dst[x] = av_clip_pixel(src[x] + offset_val);
674  }
675  init_y = 1;
676  }
677  if (borders[3]){
678  int offset_val = sao_offset_val[0];
679  int y_stride = stride * (height - 1);
680  for (x = init_x; x < width; x++) {
681  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
682  }
683  height--;
684  }
685  }
686  {
687  int y_stride = init_y * stride;
688  int pos_0_0 = pos[sao_eo_class][0][0];
689  int pos_0_1 = pos[sao_eo_class][0][1];
690  int pos_1_0 = pos[sao_eo_class][1][0];
691  int pos_1_1 = pos[sao_eo_class][1][1];
692 
693  int y_stride_0_1 = (init_y + pos_0_1) * stride;
694  int y_stride_1_1 = (init_y + pos_1_1) * stride;
695  for (y = init_y; y < height; y++) {
696  for (x = init_x; x < width; x++) {
697  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
698  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
699  int offset_val = edge_idx[2 + diff0 + diff1];
700  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
701  }
702  y_stride += stride;
703  y_stride_0_1 += stride;
704  y_stride_1_1 += stride;
705  }
706  }
707 
708  {
709  // Restore pixels that can't be modified
710  int save_upper_right = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[1];
711  if(vert_edge && sao_eo_class != SAO_EO_VERT)
712  for(y = init_y+save_upper_right; y< height; y++)
713  dst[y*stride+width-1] = src[y*stride+width-1];
714  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
715  for(x = init_x; x<width-save_upper_right; x++)
716  dst[x] = src[x];
717  if(diag_edge && sao_eo_class == SAO_EO_45D)
718  dst[width-1] = src[width-1];
719  }
720 #undef CMP
721 }
722 
723 static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
724  ptrdiff_t _stride, SAOParams *sao,
725  int *borders, int _width, int _height,
726  int c_idx,
727  uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge)
728 {
729  int x, y;
730  pixel *dst = (pixel*)_dst;
731  pixel *src = (pixel*)_src;
732  ptrdiff_t stride = _stride / sizeof(pixel);
733  int chroma = !!c_idx;
734  int *sao_offset_val = sao->offset_val[c_idx];
735  int sao_eo_class = sao->eo_class[c_idx];
736 
737  static const int8_t pos[4][2][2] = {
738  {{ -1, 0}, { 1, 0}}, // horizontal
739  {{ 0, -1}, { 0, 1}}, // vertical
740  {{ -1, -1}, { 1, 1}}, // 45 degree
741  {{ 1, -1}, {-1, 1}}, // 135 degree
742  };
743  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
744 
745  int init_x = 0, init_y = 0, width = _width, height = _height;
746 
747 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
748 
749  init_y = -(4 >> chroma) - 2;
750  init_x = -(8 >> chroma) - 2;
751  width = (8 >> chroma) + 2;
752  height = (4 >> chroma) + 2;
753 
754 
755  dst = dst + (init_y * stride + init_x);
756  src = src + (init_y * stride + init_x);
757  init_y = init_x = 0;
758 
759  {
760  int y_stride = init_y * stride;
761  int pos_0_0 = pos[sao_eo_class][0][0];
762  int pos_0_1 = pos[sao_eo_class][0][1];
763  int pos_1_0 = pos[sao_eo_class][1][0];
764  int pos_1_1 = pos[sao_eo_class][1][1];
765 
766  int y_stride_0_1 = (init_y + pos_0_1) * stride;
767  int y_stride_1_1 = (init_y + pos_1_1) * stride;
768 
769  for (y = init_y; y < height; y++) {
770  for (x = init_x; x < width; x++) {
771  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
772  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
773  int offset_val = edge_idx[2 + diff0 + diff1];
774  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
775  }
776  y_stride += stride;
777  y_stride_0_1 += stride;
778  y_stride_1_1 += stride;
779  }
780  }
781 
782  {
783  // Restore pixels that can't be modified
784  int save_lower_right = !diag_edge && sao_eo_class == SAO_EO_135D;
785  if(vert_edge && sao_eo_class != SAO_EO_VERT)
786  for(y = init_y; y< height-save_lower_right; y++)
787  dst[y*stride+width-1] = src[y*stride+width-1];
788  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
789  for(x = init_x; x<width-save_lower_right; x++)
790  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
791  if(diag_edge && sao_eo_class == SAO_EO_135D)
792  dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
793  }
794 #undef CMP
795 }
796 
797 #undef SET
798 #undef SCALE
799 #undef ADD_AND_SCALE
800 #undef TR_4
801 #undef TR_4_1
802 #undef TR_4_2
803 #undef TR_8
804 #undef TR_8_1
805 #undef TR_8_2
806 #undef TR_16
807 #undef TR_16_1
808 #undef TR_16_2
809 #undef TR_32
810 #undef TR_32_1
811 #undef TR_32_2
812 
813 static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
814  uint8_t *_src, ptrdiff_t _srcstride,
815  int width, int height, int16_t* mcbuffer)
816 {
817  int x, y;
818  pixel *src = (pixel*)_src;
819  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
820 
821  for (y = 0; y < height; y++) {
822  for (x = 0; x < width; x++)
823  dst[x] = src[x] << (14 - BIT_DEPTH);
824  src += srcstride;
825  dst += dststride;
826  }
827 }
828 
829 #define QPEL_FILTER_1(src, stride) \
830  (-src[x-3*stride] + 4*src[x-2*stride] - 10*src[x-stride] + 58*src[x] + \
831  17*src[x+stride] - 5*src[x+2*stride] + 1*src[x+3*stride])
832 #define QPEL_FILTER_2(src, stride) \
833  (-src[x-3*stride] + 4*src[x-2*stride] - 11*src[x-stride] + 40*src[x] + \
834  40*src[x+stride] - 11*src[x+2*stride] + 4*src[x+3*stride] - src[x+4*stride])
835 #define QPEL_FILTER_3(src, stride) \
836  (src[x-2*stride] - 5*src[x-stride] + 17*src[x] + 58*src[x+stride] \
837  - 10*src[x+2*stride] + 4*src[x+3*stride] - src[x+4*stride])
838 
839 
840 #define PUT_HEVC_QPEL_H(H) \
841 static void FUNC(put_hevc_qpel_h ## H)(int16_t *dst, ptrdiff_t dststride, \
842  uint8_t *_src, ptrdiff_t _srcstride, \
843  int width, int height, \
844  int16_t* mcbuffer) \
845 { \
846  int x, y; \
847  pixel *src = (pixel*)_src; \
848  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
849  \
850  for (y = 0; y < height; y++) { \
851  for (x = 0; x < width; x++) \
852  dst[x] = QPEL_FILTER_ ## H (src, 1) >> (BIT_DEPTH - 8); \
853  src += srcstride; \
854  dst += dststride; \
855  } \
856 }
857 
858 #define PUT_HEVC_QPEL_V(V) \
859 static void FUNC(put_hevc_qpel_v ## V)(int16_t *dst, ptrdiff_t dststride, \
860  uint8_t *_src, ptrdiff_t _srcstride, \
861  int width, int height, \
862  int16_t* mcbuffer) \
863 { \
864  int x, y; \
865  pixel *src = (pixel*)_src; \
866  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
867  \
868  for (y = 0; y < height; y++) { \
869  for (x = 0; x < width; x++) \
870  dst[x] = QPEL_FILTER_ ## V (src, srcstride) >> (BIT_DEPTH - 8); \
871  src += srcstride; \
872  dst += dststride; \
873  } \
874 }
875 
876 #define PUT_HEVC_QPEL_HV(H, V) \
877 static void FUNC(put_hevc_qpel_h ## H ## v ## V)(int16_t *dst, ptrdiff_t dststride, \
878  uint8_t *_src, ptrdiff_t _srcstride,\
879  int width, int height, \
880  int16_t* mcbuffer) \
881 { \
882  int x, y; \
883  pixel *src = (pixel*)_src; \
884  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
885  \
886  int16_t tmp_array[(MAX_PB_SIZE + 7)*MAX_PB_SIZE]; \
887  int16_t *tmp = tmp_array; \
888  \
889  src -= ff_hevc_qpel_extra_before[V] * srcstride; \
890  \
891  for (y = 0; y < height + ff_hevc_qpel_extra[V]; y++) { \
892  for (x = 0; x < width; x++) \
893  tmp[x] = QPEL_FILTER_ ## H (src, 1) >> (BIT_DEPTH - 8); \
894  src += srcstride; \
895  tmp += MAX_PB_SIZE; \
896  } \
897  \
898  tmp = tmp_array + ff_hevc_qpel_extra_before[V] * MAX_PB_SIZE; \
899  \
900  for (y = 0; y < height; y++) { \
901  for (x = 0; x < width; x++) \
902  dst[x] = QPEL_FILTER_ ## V (tmp, MAX_PB_SIZE) >> 6; \
903  tmp += MAX_PB_SIZE; \
904  dst += dststride; \
905  } \
906 }
907 
914 PUT_HEVC_QPEL_HV(1, 1)
915 PUT_HEVC_QPEL_HV(1, 2)
916 PUT_HEVC_QPEL_HV(1, 3)
917 PUT_HEVC_QPEL_HV(2, 1)
918 PUT_HEVC_QPEL_HV(2, 2)
919 PUT_HEVC_QPEL_HV(2, 3)
920 PUT_HEVC_QPEL_HV(3, 1)
921 PUT_HEVC_QPEL_HV(3, 2)
922 PUT_HEVC_QPEL_HV(3, 3)
923 
924 static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
925  uint8_t *_src, ptrdiff_t _srcstride,
926  int width, int height, int mx, int my,
927  int16_t* mcbuffer)
928 {
929  int x, y;
930  pixel *src = (pixel*)_src;
931  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
932 
933  for (y = 0; y < height; y++) {
934  for (x = 0; x < width; x++) {
935  dst[x] = src[x] << (14 - BIT_DEPTH);
936  }
937  src += srcstride;
938  dst += dststride;
939  }
940 }
941 
942 #define EPEL_FILTER(src, stride) \
943  (filter_0*src[x-stride] + filter_1*src[x] + filter_2*src[x+stride] + filter_3*src[x+2*stride])
944 
945 static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
946  uint8_t *_src, ptrdiff_t _srcstride,
947  int width, int height, int mx, int my,
948  int16_t* mcbuffer)
949 {
950  int x, y;
951  pixel *src = (pixel*)_src;
952  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
953  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
954  int8_t filter_0 = filter[0];
955  int8_t filter_1 = filter[1];
956  int8_t filter_2 = filter[2];
957  int8_t filter_3 = filter[3];
958  for (y = 0; y < height; y++) {
959  for (x = 0; x < width; x++) {
960  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
961  }
962  src += srcstride;
963  dst += dststride;
964  }
965 }
966 
967 static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
968  uint8_t *_src, ptrdiff_t _srcstride,
969  int width, int height, int mx, int my,
970  int16_t* mcbuffer)
971 {
972  int x, y;
973  pixel *src = (pixel*)_src;
974  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
975  const int8_t *filter = ff_hevc_epel_filters[my-1];
976  int8_t filter_0 = filter[0];
977  int8_t filter_1 = filter[1];
978  int8_t filter_2 = filter[2];
979  int8_t filter_3 = filter[3];
980 
981  for (y = 0; y < height; y++) {
982  for (x = 0; x < width; x++) {
983  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
984  }
985  src += srcstride;
986  dst += dststride;
987  }
988 }
989 
990 static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
991  uint8_t *_src, ptrdiff_t _srcstride,
992  int width, int height, int mx, int my,
993  int16_t* mcbuffer)
994 {
995  int x, y;
996  pixel *src = (pixel*)_src;
997  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
998  const int8_t *filter_h = ff_hevc_epel_filters[mx-1];
999  const int8_t *filter_v = ff_hevc_epel_filters[my-1];
1000  int8_t filter_0 = filter_h[0];
1001  int8_t filter_1 = filter_h[1];
1002  int8_t filter_2 = filter_h[2];
1003  int8_t filter_3 = filter_h[3];
1004  int16_t tmp_array[(MAX_PB_SIZE + 3)*MAX_PB_SIZE];
1005  int16_t *tmp = tmp_array;
1006 
1007  src -= EPEL_EXTRA_BEFORE * srcstride;
1008 
1009  for (y = 0; y < height + EPEL_EXTRA; y++) {
1010  for (x = 0; x < width; x++) {
1011  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1012  }
1013  src += srcstride;
1014  tmp += MAX_PB_SIZE;
1015  }
1016 
1017  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1018  filter_0 = filter_v[0];
1019  filter_1 = filter_v[1];
1020  filter_2 = filter_v[2];
1021  filter_3 = filter_v[3];
1022  for (y = 0; y < height; y++) {
1023  for (x = 0; x < width; x++) {
1024  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1025  }
1026  tmp += MAX_PB_SIZE;
1027  dst += dststride;
1028  }
1029 }
1030 
1031 static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
1032  int16_t *src, ptrdiff_t srcstride,
1033  int width, int height)
1034 {
1035  int x, y;
1036  pixel *dst = (pixel*)_dst;
1037  ptrdiff_t dststride = _dststride / sizeof(pixel);
1038 
1039  int shift = 14 - BIT_DEPTH;
1040 #if BIT_DEPTH < 14
1041  int offset = 1 << (shift - 1);
1042 #else
1043  int offset = 0;
1044 #endif
1045  for (y = 0; y < height; y++) {
1046  for (x = 0; x < width; x++) {
1047  dst[x] = av_clip_pixel((src[x] + offset) >> shift);
1048  }
1049  dst += dststride;
1050  src += srcstride;
1051  }
1052 }
1053 
1054 static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
1055  int16_t *src1, int16_t *src2,
1056  ptrdiff_t srcstride,
1057  int width, int height)
1058 {
1059  int x, y;
1060  pixel *dst = (pixel*)_dst;
1061  ptrdiff_t dststride = _dststride / sizeof(pixel);
1062 
1063  int shift = 14 + 1 - BIT_DEPTH;
1064 #if BIT_DEPTH < 14
1065  int offset = 1 << (shift - 1);
1066 #else
1067  int offset = 0;
1068 #endif
1069 
1070  for (y = 0; y < height; y++) {
1071  for (x = 0; x < width; x++) {
1072  dst[x] = av_clip_pixel((src1[x] + src2[x] + offset) >> shift);
1073  }
1074  dst += dststride;
1075  src1 += srcstride;
1076  src2 += srcstride;
1077  }
1078 }
1079 
1080 static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
1081  uint8_t *_dst, ptrdiff_t _dststride,
1082  int16_t *src, ptrdiff_t srcstride,
1083  int width, int height)
1084 {
1085  int shift;
1086  int log2Wd;
1087  int wx;
1088  int ox;
1089  int x , y;
1090  int offset;
1091  pixel *dst = (pixel*)_dst;
1092  ptrdiff_t dststride = _dststride / sizeof(pixel);
1093 
1094  shift = 14 - BIT_DEPTH;
1095  log2Wd = denom + shift;
1096  offset = 1 << (log2Wd - 1);
1097  wx = wlxFlag;
1098  ox = olxFlag * (1 << (BIT_DEPTH - 8));
1099 
1100  for (y = 0; y < height; y++) {
1101  for (x = 0; x < width; x++) {
1102  if (log2Wd >= 1) {
1103  dst[x] = av_clip_pixel(((src[x] * wx + offset) >> log2Wd) + ox);
1104  } else {
1105  dst[x] = av_clip_pixel(src[x] * wx + ox);
1106  }
1107  }
1108  dst += dststride;
1109  src += srcstride;
1110  }
1111 }
1112 
1113 static void FUNC(weighted_pred_avg)(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
1114  int16_t ol0Flag, int16_t ol1Flag,
1115  uint8_t *_dst, ptrdiff_t _dststride,
1116  int16_t *src1, int16_t *src2, ptrdiff_t srcstride,
1117  int width, int height)
1118 {
1119  int shift;
1120  int log2Wd;
1121  int w0;
1122  int w1;
1123  int o0;
1124  int o1;
1125  int x , y;
1126  pixel *dst = (pixel*)_dst;
1127  ptrdiff_t dststride = _dststride / sizeof(pixel);
1128 
1129  shift = 14 - BIT_DEPTH;
1130  log2Wd = denom + shift;
1131  w0 = wl0Flag;
1132  w1 = wl1Flag;
1133  o0 = (ol0Flag) * (1 << (BIT_DEPTH - 8));
1134  o1 = (ol1Flag) * (1 << (BIT_DEPTH - 8));
1135 
1136  for (y = 0; y < height; y++) {
1137  for (x = 0; x < width; x++) {
1138  dst[x] = av_clip_pixel((src1[x] * w0 + src2[x] * w1 +
1139  ((o0 + o1 + 1) << log2Wd)) >> (log2Wd + 1));
1140  }
1141  dst += dststride;
1142  src1 += srcstride;
1143  src2 += srcstride;
1144  }
1145 }
1146 
1147 // line zero
1148 #define P3 pix[-4*xstride]
1149 #define P2 pix[-3*xstride]
1150 #define P1 pix[-2*xstride]
1151 #define P0 pix[-xstride]
1152 #define Q0 pix[0]
1153 #define Q1 pix[xstride]
1154 #define Q2 pix[2*xstride]
1155 #define Q3 pix[3*xstride]
1156 
1157 // line three. used only for deblocking decision
1158 #define TP3 pix[-4*xstride+3*ystride]
1159 #define TP2 pix[-3*xstride+3*ystride]
1160 #define TP1 pix[-2*xstride+3*ystride]
1161 #define TP0 pix[-xstride+3*ystride]
1162 #define TQ0 pix[3*ystride]
1163 #define TQ1 pix[xstride+3*ystride]
1164 #define TQ2 pix[2*xstride+3*ystride]
1165 #define TQ3 pix[3*xstride+3*ystride]
1166 
1167 static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, ptrdiff_t _xstride,
1168  ptrdiff_t _ystride, int *_beta, int *_tc,
1169  uint8_t *_no_p, uint8_t *_no_q)
1170 {
1171  int d, j;
1172  pixel *pix = (pixel*)_pix;
1173  ptrdiff_t xstride = _xstride / sizeof(pixel);
1174  ptrdiff_t ystride = _ystride / sizeof(pixel);
1175 
1176  for (j = 0; j < 2; j++) {
1177  const int dp0 = abs(P2 - 2 * P1 + P0);
1178  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1179  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1180  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1181  const int d0 = dp0 + dq0;
1182  const int d3 = dp3 + dq3;
1183  int beta = _beta[j] << (BIT_DEPTH - 8);
1184  const int tc = _tc[j] << (BIT_DEPTH - 8);
1185  const int no_p = _no_p[j];
1186  const int no_q = _no_q[j];
1187 
1188  if (d0 + d3 >= beta /*|| tc <= 0*/) {
1189  pix += 4 * ystride;
1190  continue;
1191  } else {
1192  const int beta_3 = beta >> 3;
1193  const int beta_2 = beta >> 2;
1194  const int tc25 = ((tc * 5 + 1) >> 1);
1195 
1196  if (abs( P3 - P0) + abs( Q3 - Q0) < beta_3 && abs( P0 - Q0) < tc25 &&
1197  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1198  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1199  // strong filtering
1200  const int tc2 = tc << 1;
1201  for (d = 0; d < 4; d++) {
1202  const int p3 = P3;
1203  const int p2 = P2;
1204  const int p1 = P1;
1205  const int p0 = P0;
1206  const int q0 = Q0;
1207  const int q1 = Q1;
1208  const int q2 = Q2;
1209  const int q3 = Q3;
1210  if (!no_p) {
1211  P0 = p0 + av_clip((( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3) - p0, -tc2, tc2);
1212  P1 = p1 + av_clip((( p2 + p1 + p0 + q0 + 2 ) >> 2) - p1, -tc2, tc2);
1213  P2 = p2 + av_clip((( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3) - p2, -tc2, tc2);
1214  }
1215  if (!no_q) {
1216  Q0 = q0 + av_clip((( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3) - q0, -tc2, tc2);
1217  Q1 = q1 + av_clip((( p0 + q0 + q1 + q2 + 2 ) >> 2) - q1, -tc2, tc2);
1218  Q2 = q2 + av_clip((( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3) - q2, -tc2, tc2);
1219  }
1220  pix += ystride;
1221  }
1222  } else { // normal filtering
1223  int nd_p = 1;
1224  int nd_q = 1;
1225  const int tc_2 = tc >> 1;
1226  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1227  nd_p = 2;
1228  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1229  nd_q = 2;
1230 
1231  for (d = 0; d < 4; d++) {
1232  const int p2 = P2;
1233  const int p1 = P1;
1234  const int p0 = P0;
1235  const int q0 = Q0;
1236  const int q1 = Q1;
1237  const int q2 = Q2;
1238  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1239  if (abs(delta0) < 10 * tc) {
1240  delta0 = av_clip(delta0, -tc, tc);
1241  if (!no_p)
1242  P0 = av_clip_pixel(p0 + delta0);
1243  if (!no_q)
1244  Q0 = av_clip_pixel(q0 - delta0);
1245  if (!no_p && nd_p > 1) {
1246  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1247  P1 = av_clip_pixel(p1 + deltap1);
1248  }
1249  if (!no_q && nd_q > 1) {
1250  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1251  Q1 = av_clip_pixel(q1 + deltaq1);
1252  }
1253  }
1254  pix += ystride;
1255  }
1256  }
1257  }
1258  }
1259 }
1260 
1261 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1262  ptrdiff_t _ystride, int *_tc,
1263  uint8_t *_no_p, uint8_t *_no_q)
1264 {
1265  int d, j;
1266  int no_p, no_q;
1267  pixel *pix = (pixel*)_pix;
1268  ptrdiff_t xstride = _xstride / sizeof(pixel);
1269  ptrdiff_t ystride = _ystride / sizeof(pixel);
1270 
1271  for (j = 0; j < 2; j++) {
1272  const int tc = _tc[j] << (BIT_DEPTH - 8);
1273  if (tc <= 0) {
1274  pix += 4 * ystride;
1275  continue;
1276  }
1277  no_p = _no_p[j];
1278  no_q = _no_q[j];
1279 
1280  for (d = 0; d < 4; d++) {
1281  int delta0;
1282  const int p1 = P1;
1283  const int p0 = P0;
1284  const int q0 = Q0;
1285  const int q1 = Q1;
1286  delta0 = av_clip((((q0 - p0) << 2) + p1 - q1 + 4) >> 3, -tc, tc);
1287  if (!no_p)
1288  P0 = av_clip_pixel(p0 + delta0);
1289  if (!no_q)
1290  Q0 = av_clip_pixel(q0 - delta0);
1291  pix += ystride;
1292  }
1293  }
1294 }
1295 
1296 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1297  int *tc, uint8_t *no_p, uint8_t *no_q)
1298 {
1299  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1300 }
1301 
1302 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1303  int *tc, uint8_t *no_p, uint8_t *no_q)
1304 {
1305  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1306 }
1307 
1308 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1309  int *beta, int *tc, uint8_t *no_p,
1310  uint8_t *no_q)
1311 {
1312  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), beta, tc, no_p, no_q);
1313 }
1314 
1315 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1316  int *beta, int *tc, uint8_t *no_p,
1317  uint8_t *no_q)
1318 {
1319  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, beta, tc, no_p, no_q);
1320 }
1321 
1322 #undef P3
1323 #undef P2
1324 #undef P1
1325 #undef P0
1326 #undef Q0
1327 #undef Q1
1328 #undef Q2
1329 #undef Q3
1330 
1331 #undef TP3
1332 #undef TP2
1333 #undef TP1
1334 #undef TP0
1335 #undef TQ0
1336 #undef TQ1
1337 #undef TQ2
1338 #undef TQ3