FFmpeg
cavsdsp.c
Go to the documentation of this file.
1 /*
2  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
3  *
4  * DSP functions
5  *
6  * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include <stdio.h>
26 
27 #include "idctdsp.h"
28 #include "mathops.h"
29 #include "cavsdsp.h"
30 #include "libavutil/common.h"
31 
32 /*****************************************************************************
33  *
34  * in-loop deblocking filter
35  *
36  ****************************************************************************/
37 
38 #define P2 p0_p[-3*stride]
39 #define P1 p0_p[-2*stride]
40 #define P0 p0_p[-1*stride]
41 #define Q0 p0_p[ 0*stride]
42 #define Q1 p0_p[ 1*stride]
43 #define Q2 p0_p[ 2*stride]
44 
45 static inline void loop_filter_l2(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta)
46 {
47  int p0 = P0;
48  int q0 = Q0;
49 
50  if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
51  int s = p0 + q0 + 2;
52  alpha = (alpha>>2) + 2;
53  if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
54  P0 = (P1 + p0 + s) >> 2;
55  P1 = (2*P1 + s) >> 2;
56  } else
57  P0 = (2*P1 + s) >> 2;
58  if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
59  Q0 = (Q1 + q0 + s) >> 2;
60  Q1 = (2*Q1 + s) >> 2;
61  } else
62  Q0 = (2*Q1 + s) >> 2;
63  }
64 }
65 
66 static inline void loop_filter_l1(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta, int tc)
67 {
68  int p0 = P0;
69  int q0 = Q0;
70 
71  if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
72  int delta = av_clip(((q0-p0)*3+P1-Q1+4)>>3,-tc, tc);
73  P0 = av_clip_uint8(p0+delta);
74  Q0 = av_clip_uint8(q0-delta);
75  if(abs(P2-p0)<beta) {
76  delta = av_clip(((P0-P1)*3+P2-Q0+4)>>3, -tc, tc);
77  P1 = av_clip_uint8(P1+delta);
78  }
79  if(abs(Q2-q0)<beta) {
80  delta = av_clip(((Q1-Q0)*3+P0-Q2+4)>>3, -tc, tc);
81  Q1 = av_clip_uint8(Q1-delta);
82  }
83  }
84 }
85 
86 static inline void loop_filter_c2(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta)
87 {
88  int p0 = P0;
89  int q0 = Q0;
90 
91  if(abs(p0-q0)<alpha && abs(P1-p0)<beta && abs(Q1-q0)<beta) {
92  int s = p0 + q0 + 2;
93  alpha = (alpha>>2) + 2;
94  if(abs(P2-p0) < beta && abs(p0-q0) < alpha) {
95  P0 = (P1 + p0 + s) >> 2;
96  } else
97  P0 = (2*P1 + s) >> 2;
98  if(abs(Q2-q0) < beta && abs(q0-p0) < alpha) {
99  Q0 = (Q1 + q0 + s) >> 2;
100  } else
101  Q0 = (2*Q1 + s) >> 2;
102  }
103 }
104 
105 static inline void loop_filter_c1(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta,
106  int tc)
107 {
108  if(abs(P0-Q0)<alpha && abs(P1-P0)<beta && abs(Q1-Q0)<beta) {
109  int delta = av_clip(((Q0-P0)*3+P1-Q1+4)>>3, -tc, tc);
110  P0 = av_clip_uint8(P0+delta);
111  Q0 = av_clip_uint8(Q0-delta);
112  }
113 }
114 
115 #undef P0
116 #undef P1
117 #undef P2
118 #undef Q0
119 #undef Q1
120 #undef Q2
121 
122 static void cavs_filter_lv_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc,
123  int bs1, int bs2)
124 {
125  int i;
126  if(bs1==2)
127  for(i=0;i<16;i++)
128  loop_filter_l2(d + i*stride,1,alpha,beta);
129  else {
130  if(bs1)
131  for(i=0;i<8;i++)
132  loop_filter_l1(d + i*stride,1,alpha,beta,tc);
133  if (bs2)
134  for(i=8;i<16;i++)
135  loop_filter_l1(d + i*stride,1,alpha,beta,tc);
136  }
137 }
138 
139 static void cavs_filter_lh_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc,
140  int bs1, int bs2)
141 {
142  int i;
143  if(bs1==2)
144  for(i=0;i<16;i++)
145  loop_filter_l2(d + i,stride,alpha,beta);
146  else {
147  if(bs1)
148  for(i=0;i<8;i++)
149  loop_filter_l1(d + i,stride,alpha,beta,tc);
150  if (bs2)
151  for(i=8;i<16;i++)
152  loop_filter_l1(d + i,stride,alpha,beta,tc);
153  }
154 }
155 
156 static void cavs_filter_cv_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc,
157  int bs1, int bs2)
158 {
159  int i;
160  if(bs1==2)
161  for(i=0;i<8;i++)
162  loop_filter_c2(d + i*stride,1,alpha,beta);
163  else {
164  if(bs1)
165  for(i=0;i<4;i++)
166  loop_filter_c1(d + i*stride,1,alpha,beta,tc);
167  if (bs2)
168  for(i=4;i<8;i++)
169  loop_filter_c1(d + i*stride,1,alpha,beta,tc);
170  }
171 }
172 
173 static void cavs_filter_ch_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc,
174  int bs1, int bs2)
175 {
176  int i;
177  if(bs1==2)
178  for(i=0;i<8;i++)
179  loop_filter_c2(d + i,stride,alpha,beta);
180  else {
181  if(bs1)
182  for(i=0;i<4;i++)
183  loop_filter_c1(d + i,stride,alpha,beta,tc);
184  if (bs2)
185  for(i=4;i<8;i++)
186  loop_filter_c1(d + i,stride,alpha,beta,tc);
187  }
188 }
189 
190 /*****************************************************************************
191  *
192  * inverse transform
193  *
194  ****************************************************************************/
195 
196 static void cavs_idct8_add_c(uint8_t *dst, int16_t *block, ptrdiff_t stride)
197 {
198  int i;
199  int16_t (*src)[8] = (int16_t(*)[8])block;
200 
201  src[0][0] += 8;
202 
203  for( i = 0; i < 8; i++ ) {
204  const int a0 = 3*src[i][1] - (src[i][7]<<1);
205  const int a1 = 3*src[i][3] + (src[i][5]<<1);
206  const int a2 = (src[i][3]<<1) - 3*src[i][5];
207  const int a3 = (src[i][1]<<1) + 3*src[i][7];
208 
209  const int b4 = ((a0 + a1 + a3)<<1) + a1;
210  const int b5 = ((a0 - a1 + a2)<<1) + a0;
211  const int b6 = ((a3 - a2 - a1)<<1) + a3;
212  const int b7 = ((a0 - a2 - a3)<<1) - a2;
213 
214  const int a7 = (src[i][2]<<2) - 10*src[i][6];
215  const int a6 = (src[i][6]<<2) + 10*src[i][2];
216  const int a5 = ((src[i][0] - src[i][4]) << 3) + 4;
217  const int a4 = ((src[i][0] + src[i][4]) << 3) + 4;
218 
219  const int b0 = a4 + a6;
220  const int b1 = a5 + a7;
221  const int b2 = a5 - a7;
222  const int b3 = a4 - a6;
223 
224  src[i][0] = (b0 + b4) >> 3;
225  src[i][1] = (b1 + b5) >> 3;
226  src[i][2] = (b2 + b6) >> 3;
227  src[i][3] = (b3 + b7) >> 3;
228  src[i][4] = (b3 - b7) >> 3;
229  src[i][5] = (b2 - b6) >> 3;
230  src[i][6] = (b1 - b5) >> 3;
231  src[i][7] = (b0 - b4) >> 3;
232  }
233  for( i = 0; i < 8; i++ ) {
234  const int a0 = 3*src[1][i] - (src[7][i]<<1);
235  const int a1 = 3*src[3][i] + (src[5][i]<<1);
236  const int a2 = (src[3][i]<<1) - 3*src[5][i];
237  const int a3 = (src[1][i]<<1) + 3*src[7][i];
238 
239  const int b4 = ((a0 + a1 + a3)<<1) + a1;
240  const int b5 = ((a0 - a1 + a2)<<1) + a0;
241  const int b6 = ((a3 - a2 - a1)<<1) + a3;
242  const int b7 = ((a0 - a2 - a3)<<1) - a2;
243 
244  const int a7 = (src[2][i]<<2) - 10*src[6][i];
245  const int a6 = (src[6][i]<<2) + 10*src[2][i];
246  const int a5 = (src[0][i] - src[4][i]) << 3;
247  const int a4 = (src[0][i] + src[4][i]) << 3;
248 
249  const int b0 = a4 + a6;
250  const int b1 = a5 + a7;
251  const int b2 = a5 - a7;
252  const int b3 = a4 - a6;
253 
254  dst[i + 0*stride] = av_clip_uint8( dst[i + 0*stride] + ((b0 + b4) >> 7));
255  dst[i + 1*stride] = av_clip_uint8( dst[i + 1*stride] + ((b1 + b5) >> 7));
256  dst[i + 2*stride] = av_clip_uint8( dst[i + 2*stride] + ((b2 + b6) >> 7));
257  dst[i + 3*stride] = av_clip_uint8( dst[i + 3*stride] + ((b3 + b7) >> 7));
258  dst[i + 4*stride] = av_clip_uint8( dst[i + 4*stride] + ((b3 - b7) >> 7));
259  dst[i + 5*stride] = av_clip_uint8( dst[i + 5*stride] + ((b2 - b6) >> 7));
260  dst[i + 6*stride] = av_clip_uint8( dst[i + 6*stride] + ((b1 - b5) >> 7));
261  dst[i + 7*stride] = av_clip_uint8( dst[i + 7*stride] + ((b0 - b4) >> 7));
262  }
263 }
264 
265 /*****************************************************************************
266  *
267  * motion compensation
268  *
269  ****************************************************************************/
270 
271 #define CAVS_SUBPIX(OPNAME, OP, NAME, A, B, C, D, E, F) \
272 static void OPNAME ## cavs_filt8_h_ ## NAME(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
273 { \
274  const int h=8;\
275  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
276  int i;\
277  for(i=0; i<h; i++)\
278  {\
279  OP(dst[0], A*src[-2] + B*src[-1] + C*src[0] + D*src[1] + E*src[2] + F*src[3]);\
280  OP(dst[1], A*src[-1] + B*src[ 0] + C*src[1] + D*src[2] + E*src[3] + F*src[4]);\
281  OP(dst[2], A*src[ 0] + B*src[ 1] + C*src[2] + D*src[3] + E*src[4] + F*src[5]);\
282  OP(dst[3], A*src[ 1] + B*src[ 2] + C*src[3] + D*src[4] + E*src[5] + F*src[6]);\
283  OP(dst[4], A*src[ 2] + B*src[ 3] + C*src[4] + D*src[5] + E*src[6] + F*src[7]);\
284  OP(dst[5], A*src[ 3] + B*src[ 4] + C*src[5] + D*src[6] + E*src[7] + F*src[8]);\
285  OP(dst[6], A*src[ 4] + B*src[ 5] + C*src[6] + D*src[7] + E*src[8] + F*src[9]);\
286  OP(dst[7], A*src[ 5] + B*src[ 6] + C*src[7] + D*src[8] + E*src[9] + F*src[10]);\
287  dst+=dstStride;\
288  src+=srcStride;\
289  }\
290 }\
291 \
292 static void OPNAME ## cavs_filt8_v_ ## NAME(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
293 { \
294  const int w=8;\
295  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
296  int i;\
297  for(i=0; i<w; i++)\
298  {\
299  const int srcB= src[-2*srcStride];\
300  const int srcA= src[-1*srcStride];\
301  const int src0= src[0 *srcStride];\
302  const int src1= src[1 *srcStride];\
303  const int src2= src[2 *srcStride];\
304  const int src3= src[3 *srcStride];\
305  const int src4= src[4 *srcStride];\
306  const int src5= src[5 *srcStride];\
307  const int src6= src[6 *srcStride];\
308  const int src7= src[7 *srcStride];\
309  const int src8= src[8 *srcStride];\
310  const int src9= src[9 *srcStride];\
311  const int src10= src[10 *srcStride];\
312  OP(dst[0*dstStride], A*srcB + B*srcA + C*src0 + D*src1 + E*src2 + F*src3);\
313  OP(dst[1*dstStride], A*srcA + B*src0 + C*src1 + D*src2 + E*src3 + F*src4);\
314  OP(dst[2*dstStride], A*src0 + B*src1 + C*src2 + D*src3 + E*src4 + F*src5);\
315  OP(dst[3*dstStride], A*src1 + B*src2 + C*src3 + D*src4 + E*src5 + F*src6);\
316  OP(dst[4*dstStride], A*src2 + B*src3 + C*src4 + D*src5 + E*src6 + F*src7);\
317  OP(dst[5*dstStride], A*src3 + B*src4 + C*src5 + D*src6 + E*src7 + F*src8);\
318  OP(dst[6*dstStride], A*src4 + B*src5 + C*src6 + D*src7 + E*src8 + F*src9);\
319  OP(dst[7*dstStride], A*src5 + B*src6 + C*src7 + D*src8 + E*src9 + F*src10);\
320  dst++;\
321  src++;\
322  }\
323 }\
324 \
325 static void OPNAME ## cavs_filt16_v_ ## NAME(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
326 { \
327  OPNAME ## cavs_filt8_v_ ## NAME(dst , src , dstStride, srcStride);\
328  OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
329  src += 8*srcStride;\
330  dst += 8*dstStride;\
331  OPNAME ## cavs_filt8_v_ ## NAME(dst , src , dstStride, srcStride);\
332  OPNAME ## cavs_filt8_v_ ## NAME(dst+8, src+8, dstStride, srcStride);\
333 }\
334 \
335 static void OPNAME ## cavs_filt16_h_ ## NAME(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
336 { \
337  OPNAME ## cavs_filt8_h_ ## NAME(dst , src , dstStride, srcStride);\
338  OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
339  src += 8*srcStride;\
340  dst += 8*dstStride;\
341  OPNAME ## cavs_filt8_h_ ## NAME(dst , src , dstStride, srcStride);\
342  OPNAME ## cavs_filt8_h_ ## NAME(dst+8, src+8, dstStride, srcStride);\
343 }\
344 
345 #define CAVS_SUBPIX_HV(OPNAME, OP, NAME, AH, BH, CH, DH, EH, FH, AV, BV, CV, DV, EV, FV, FULL) \
346 static void OPNAME ## cavs_filt8_hv_ ## NAME(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t srcStride)\
347 { \
348  int16_t temp[8*(8+5)];\
349  int16_t *tmp = temp;\
350  const int h=8;\
351  const int w=8;\
352  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
353  int i;\
354  src1 -= 2*srcStride;\
355  for(i=0; i<h+5; i++)\
356  {\
357  tmp[0]= AH*src1[-2] + BH*src1[-1] + CH*src1[0] + DH*src1[1] + EH*src1[2] + FH*src1[3];\
358  tmp[1]= AH*src1[-1] + BH*src1[ 0] + CH*src1[1] + DH*src1[2] + EH*src1[3] + FH*src1[4];\
359  tmp[2]= AH*src1[ 0] + BH*src1[ 1] + CH*src1[2] + DH*src1[3] + EH*src1[4] + FH*src1[5];\
360  tmp[3]= AH*src1[ 1] + BH*src1[ 2] + CH*src1[3] + DH*src1[4] + EH*src1[5] + FH*src1[6];\
361  tmp[4]= AH*src1[ 2] + BH*src1[ 3] + CH*src1[4] + DH*src1[5] + EH*src1[6] + FH*src1[7];\
362  tmp[5]= AH*src1[ 3] + BH*src1[ 4] + CH*src1[5] + DH*src1[6] + EH*src1[7] + FH*src1[8];\
363  tmp[6]= AH*src1[ 4] + BH*src1[ 5] + CH*src1[6] + DH*src1[7] + EH*src1[8] + FH*src1[9];\
364  tmp[7]= AH*src1[ 5] + BH*src1[ 6] + CH*src1[7] + DH*src1[8] + EH*src1[9] + FH*src1[10];\
365  tmp+=8;\
366  src1+=srcStride;\
367  }\
368  if(FULL) {\
369  tmp = temp+8*2; \
370  for(i=0; i<w; i++) \
371  { \
372  const int tmpB= tmp[-2*8]; \
373  const int tmpA= tmp[-1*8]; \
374  const int tmp0= tmp[0 *8]; \
375  const int tmp1= tmp[1 *8]; \
376  const int tmp2= tmp[2 *8]; \
377  const int tmp3= tmp[3 *8]; \
378  const int tmp4= tmp[4 *8]; \
379  const int tmp5= tmp[5 *8]; \
380  const int tmp6= tmp[6 *8]; \
381  const int tmp7= tmp[7 *8]; \
382  const int tmp8= tmp[8 *8]; \
383  const int tmp9= tmp[9 *8]; \
384  const int tmp10=tmp[10*8]; \
385  OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3 + 64*src2[0*srcStride]); \
386  OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4 + 64*src2[1*srcStride]); \
387  OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5 + 64*src2[2*srcStride]); \
388  OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6 + 64*src2[3*srcStride]); \
389  OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7 + 64*src2[4*srcStride]); \
390  OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8 + 64*src2[5*srcStride]); \
391  OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9 + 64*src2[6*srcStride]); \
392  OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10 + 64*src2[7*srcStride]); \
393  dst++; \
394  tmp++; \
395  src2++; \
396  } \
397  } else {\
398  tmp = temp+8*2; \
399  for(i=0; i<w; i++) \
400  { \
401  const int tmpB= tmp[-2*8]; \
402  const int tmpA= tmp[-1*8]; \
403  const int tmp0= tmp[0 *8]; \
404  const int tmp1= tmp[1 *8]; \
405  const int tmp2= tmp[2 *8]; \
406  const int tmp3= tmp[3 *8]; \
407  const int tmp4= tmp[4 *8]; \
408  const int tmp5= tmp[5 *8]; \
409  const int tmp6= tmp[6 *8]; \
410  const int tmp7= tmp[7 *8]; \
411  const int tmp8= tmp[8 *8]; \
412  const int tmp9= tmp[9 *8]; \
413  const int tmp10=tmp[10*8]; \
414  OP(dst[0*dstStride], AV*tmpB + BV*tmpA + CV*tmp0 + DV*tmp1 + EV*tmp2 + FV*tmp3); \
415  OP(dst[1*dstStride], AV*tmpA + BV*tmp0 + CV*tmp1 + DV*tmp2 + EV*tmp3 + FV*tmp4); \
416  OP(dst[2*dstStride], AV*tmp0 + BV*tmp1 + CV*tmp2 + DV*tmp3 + EV*tmp4 + FV*tmp5); \
417  OP(dst[3*dstStride], AV*tmp1 + BV*tmp2 + CV*tmp3 + DV*tmp4 + EV*tmp5 + FV*tmp6); \
418  OP(dst[4*dstStride], AV*tmp2 + BV*tmp3 + CV*tmp4 + DV*tmp5 + EV*tmp6 + FV*tmp7); \
419  OP(dst[5*dstStride], AV*tmp3 + BV*tmp4 + CV*tmp5 + DV*tmp6 + EV*tmp7 + FV*tmp8); \
420  OP(dst[6*dstStride], AV*tmp4 + BV*tmp5 + CV*tmp6 + DV*tmp7 + EV*tmp8 + FV*tmp9); \
421  OP(dst[7*dstStride], AV*tmp5 + BV*tmp6 + CV*tmp7 + DV*tmp8 + EV*tmp9 + FV*tmp10); \
422  dst++; \
423  tmp++; \
424  } \
425  }\
426 }\
427 \
428 static void OPNAME ## cavs_filt16_hv_ ## NAME(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t srcStride)\
429 { \
430  OPNAME ## cavs_filt8_hv_ ## NAME(dst , src1, src2 , dstStride, srcStride); \
431  OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
432  src1 += 8*srcStride;\
433  src2 += 8*srcStride;\
434  dst += 8*dstStride;\
435  OPNAME ## cavs_filt8_hv_ ## NAME(dst , src1, src2 , dstStride, srcStride); \
436  OPNAME ## cavs_filt8_hv_ ## NAME(dst+8, src1+8, src2+8, dstStride, srcStride); \
437 }\
438 
439 #define CAVS_MC(OPNAME, SIZE) \
440 static void OPNAME ## cavs_qpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
441 {\
442  OPNAME ## cavs_filt ## SIZE ## _h_qpel_l(dst, src, stride, stride);\
443 }\
444 \
445 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
446 {\
447  OPNAME ## cavs_filt ## SIZE ## _h_hpel(dst, src, stride, stride);\
448 }\
449 \
450 static void OPNAME ## cavs_qpel ## SIZE ## _mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
451 {\
452  OPNAME ## cavs_filt ## SIZE ## _h_qpel_r(dst, src, stride, stride);\
453 }\
454 \
455 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
456 {\
457  OPNAME ## cavs_filt ## SIZE ## _v_qpel_l(dst, src, stride, stride);\
458 }\
459 \
460 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
461 {\
462  OPNAME ## cavs_filt ## SIZE ## _v_hpel(dst, src, stride, stride);\
463 }\
464 \
465 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
466 {\
467  OPNAME ## cavs_filt ## SIZE ## _v_qpel_r(dst, src, stride, stride);\
468 }\
469 \
470 static void OPNAME ## cavs_qpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
471 {\
472  OPNAME ## cavs_filt ## SIZE ## _hv_jj(dst, src, NULL, stride, stride); \
473 }\
474 \
475 static void OPNAME ## cavs_qpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
476 {\
477  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src, stride, stride); \
478 }\
479 \
480 static void OPNAME ## cavs_qpel ## SIZE ## _mc13_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
481 {\
482  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride, stride, stride); \
483 }\
484 \
485 static void OPNAME ## cavs_qpel ## SIZE ## _mc31_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
486 {\
487  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+1, stride, stride); \
488 }\
489 \
490 static void OPNAME ## cavs_qpel ## SIZE ## _mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
491 {\
492  OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride+1,stride, stride); \
493 }\
494 \
495 static void OPNAME ## cavs_qpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
496 {\
497  OPNAME ## cavs_filt ## SIZE ## _hv_ff(dst, src, src+stride+1,stride, stride); \
498 }\
499 \
500 static void OPNAME ## cavs_qpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
501 {\
502  OPNAME ## cavs_filt ## SIZE ## _hv_ii(dst, src, src+stride+1,stride, stride); \
503 }\
504 \
505 static void OPNAME ## cavs_qpel ## SIZE ## _mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
506 {\
507  OPNAME ## cavs_filt ## SIZE ## _hv_kk(dst, src, src+stride+1,stride, stride); \
508 }\
509 \
510 static void OPNAME ## cavs_qpel ## SIZE ## _mc23_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
511 {\
512  OPNAME ## cavs_filt ## SIZE ## _hv_qq(dst, src, src+stride+1,stride, stride); \
513 }\
514 
515 #define op_put1(a, b) a = cm[((b)+4)>>3]
516 #define op_put2(a, b) a = cm[((b)+64)>>7]
517 #define op_put3(a, b) a = cm[((b)+32)>>6]
518 #define op_put4(a, b) a = cm[((b)+512)>>10]
519 #define op_avg1(a, b) a = ((a)+cm[((b)+4)>>3] +1)>>1
520 #define op_avg2(a, b) a = ((a)+cm[((b)+64)>>7] +1)>>1
521 #define op_avg3(a, b) a = ((a)+cm[((b)+32)>>6] +1)>>1
522 #define op_avg4(a, b) a = ((a)+cm[((b)+512)>>10]+1)>>1
523 CAVS_SUBPIX(put_ , op_put1, hpel, 0, -1, 5, 5, -1, 0)
524 CAVS_SUBPIX(put_ , op_put2, qpel_l, -1, -2, 96, 42, -7, 0)
525 CAVS_SUBPIX(put_ , op_put2, qpel_r, 0, -7, 42, 96, -2, -1)
526 CAVS_SUBPIX_HV(put_, op_put3, jj, 0, -1, 5, 5, -1, 0, 0, -1, 5, 5, -1, 0, 0)
527 CAVS_SUBPIX_HV(put_, op_put4, ff, 0, -1, 5, 5, -1, 0, -1, -2, 96, 42, -7, 0, 0)
528 CAVS_SUBPIX_HV(put_, op_put4, ii, -1, -2, 96, 42, -7, 0, 0, -1, 5, 5, -1, 0, 0)
529 CAVS_SUBPIX_HV(put_, op_put4, kk, 0, -7, 42, 96, -2, -1, 0, -1, 5, 5, -1, 0, 0)
530 CAVS_SUBPIX_HV(put_, op_put4, qq, 0, -1, 5, 5, -1, 0, 0, -7, 42, 96, -2,-1, 0)
531 CAVS_SUBPIX_HV(put_, op_put2, egpr, 0, -1, 5, 5, -1, 0, 0, -1, 5, 5, -1, 0, 1)
532 CAVS_SUBPIX(avg_ , op_avg1, hpel, 0, -1, 5, 5, -1, 0)
533 CAVS_SUBPIX(avg_ , op_avg2, qpel_l, -1, -2, 96, 42, -7, 0)
534 CAVS_SUBPIX(avg_ , op_avg2, qpel_r, 0, -7, 42, 96, -2, -1)
535 CAVS_SUBPIX_HV(avg_, op_avg3, jj, 0, -1, 5, 5, -1, 0, 0, -1, 5, 5, -1, 0, 0)
536 CAVS_SUBPIX_HV(avg_, op_avg4, ff, 0, -1, 5, 5, -1, 0, -1, -2, 96, 42, -7, 0, 0)
537 CAVS_SUBPIX_HV(avg_, op_avg4, ii, -1, -2, 96, 42, -7, 0, 0, -1, 5, 5, -1, 0, 0)
538 CAVS_SUBPIX_HV(avg_, op_avg4, kk, 0, -7, 42, 96, -2, -1, 0, -1, 5, 5, -1, 0, 0)
539 CAVS_SUBPIX_HV(avg_, op_avg4, qq, 0, -1, 5, 5, -1, 0, 0, -7, 42, 96, -2,-1, 0)
540 CAVS_SUBPIX_HV(avg_, op_avg2, egpr, 0, -1, 5, 5, -1, 0, 0, -1, 5, 5, -1, 0, 1)
541 CAVS_MC(put_, 8)
542 CAVS_MC(put_, 16)
543 CAVS_MC(avg_, 8)
544 CAVS_MC(avg_, 16)
545 
546 #define put_cavs_qpel8_mc00_c ff_put_pixels8x8_c
547 #define avg_cavs_qpel8_mc00_c ff_avg_pixels8x8_c
548 #define put_cavs_qpel16_mc00_c ff_put_pixels16x16_c
549 #define avg_cavs_qpel16_mc00_c ff_avg_pixels16x16_c
550 
552 #define dspfunc(PFX, IDX, NUM) \
553  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
554  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
555  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
556  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
557  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
558  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
559  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
560  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
561  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
562  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
563  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
564  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
565  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
566  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
567  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
568  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
569  dspfunc(put_cavs_qpel, 0, 16);
570  dspfunc(put_cavs_qpel, 1, 8);
571  dspfunc(avg_cavs_qpel, 0, 16);
572  dspfunc(avg_cavs_qpel, 1, 8);
579 
580  if (ARCH_X86)
581  ff_cavsdsp_init_x86(c, avctx);
582 }
static void loop_filter_c1(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta, int tc)
Definition: cavsdsp.c:105
#define P1
Definition: cavsdsp.c:39
#define op_put4(a, b)
Definition: cavsdsp.c:518
#define a0
Definition: regdef.h:46
static void cavs_filter_lv_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.c:122
static void cavs_idct8_add_c(uint8_t *dst, int16_t *block, ptrdiff_t stride)
Definition: cavsdsp.c:196
#define tc
Definition: regdef.h:69
#define a1
Definition: regdef.h:47
#define op_put1(a, b)
Definition: cavsdsp.c:515
#define src
Definition: vp8dsp.c:254
static void cavs_filter_ch_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.c:173
#define Q0
Definition: cavsdsp.c:41
#define a3
Definition: regdef.h:49
void(* cavs_idct8_add)(uint8_t *dst, int16_t *block, ptrdiff_t stride)
Definition: cavsdsp.h:37
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
#define av_cold
Definition: attributes.h:82
float delta
void(* cavs_filter_cv)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.h:35
static void cavs_filter_cv_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.c:156
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
av_cold void ff_cavsdsp_init(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:551
void(* cavs_filter_ch)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.h:36
#define op_avg1(a, b)
Definition: cavsdsp.c:519
static void loop_filter_l1(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta, int tc)
Definition: cavsdsp.c:66
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
static const uint8_t q0[256]
Definition: twofish.c:77
#define op_avg2(a, b)
Definition: cavsdsp.c:520
static void loop_filter_c2(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta)
Definition: cavsdsp.c:86
#define CAVS_MC(OPNAME, SIZE)
Definition: cavsdsp.c:439
#define Q1
Definition: cavsdsp.c:42
#define a2
Definition: regdef.h:48
#define s(width, name)
Definition: cbs_vp9.c:257
static void loop_filter_l2(uint8_t *p0_p, ptrdiff_t stride, int alpha, int beta)
Definition: cavsdsp.c:45
#define a5
Definition: regdef.h:51
#define dspfunc(PFX, IDX, NUM)
#define abs(x)
Definition: cuda_runtime.h:35
static const int16_t alpha[]
Definition: ilbcdata.h:55
main external API structure.
Definition: avcodec.h:1568
void(* cavs_filter_lv)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.h:33
int idct_perm
Definition: cavsdsp.h:38
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:429
static void cavs_filter_lh_c(uint8_t *d, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.c:139
#define P0
Definition: cavsdsp.c:40
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
common internal and external API header
#define CAVS_SUBPIX_HV(OPNAME, OP, NAME, AH, BH, CH, DH, EH, FH, AV, BV, CV, DV, EV, FV, FULL)
Definition: cavsdsp.c:345
#define Q2
Definition: cavsdsp.c:43
#define a4
Definition: regdef.h:50
#define op_avg4(a, b)
Definition: cavsdsp.c:522
#define P2
Definition: cavsdsp.c:38
#define CAVS_SUBPIX(OPNAME, OP, NAME, A, B, C, D, E, F)
Definition: cavsdsp.c:271
#define op_avg3(a, b)
Definition: cavsdsp.c:521
void(* cavs_filter_lh)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int tc, int bs1, int bs2)
Definition: cavsdsp.h:34
#define op_put2(a, b)
Definition: cavsdsp.c:516
#define op_put3(a, b)
Definition: cavsdsp.c:517
#define stride