FFmpeg
rv40dsp.c
Go to the documentation of this file.
1 /*
2  * RV40 decoder motion compensation functions
3  * Copyright (c) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * RV40 decoder motion compensation functions
25  */
26 
27 #include "libavutil/common.h"
28 #include "libavutil/intreadwrite.h"
29 #include "avcodec.h"
30 #include "h264qpel.h"
31 #include "mathops.h"
32 #include "pixels.h"
33 #include "rnd_avg.h"
34 #include "rv34dsp.h"
35 #include "libavutil/avassert.h"
36 
37 #define RV40_LOWPASS(OPNAME, OP) \
38 static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
39  const int h, const int C1, const int C2, const int SHIFT){\
40  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
41  int i;\
42  for(i = 0; i < h; i++)\
43  {\
44  OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45  OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46  OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
47  OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
48  OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
49  OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
50  OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
51  OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
52  dst += dstStride;\
53  src += srcStride;\
54  }\
55 }\
56 \
57 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
58  const int w, const int C1, const int C2, const int SHIFT){\
59  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
60  int i;\
61  for(i = 0; i < w; i++)\
62  {\
63  const int srcB = src[-2*srcStride];\
64  const int srcA = src[-1*srcStride];\
65  const int src0 = src[0 *srcStride];\
66  const int src1 = src[1 *srcStride];\
67  const int src2 = src[2 *srcStride];\
68  const int src3 = src[3 *srcStride];\
69  const int src4 = src[4 *srcStride];\
70  const int src5 = src[5 *srcStride];\
71  const int src6 = src[6 *srcStride];\
72  const int src7 = src[7 *srcStride];\
73  const int src8 = src[8 *srcStride];\
74  const int src9 = src[9 *srcStride];\
75  const int src10 = src[10*srcStride];\
76  OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77  OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78  OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
79  OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
80  OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
81  OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
82  OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
83  OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
84  dst++;\
85  src++;\
86  }\
87 }\
88 \
89 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
90  const int w, const int C1, const int C2, const int SHIFT){\
91  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
92  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
93  src += 8*srcStride;\
94  dst += 8*dstStride;\
95  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
96  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
97 }\
98 \
99 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
100  const int h, const int C1, const int C2, const int SHIFT){\
101  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
102  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
103  src += 8*srcStride;\
104  dst += 8*dstStride;\
105  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
106  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
107 }\
108 \
109 
110 #define RV40_MC(OPNAME, SIZE) \
111 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
112 {\
113  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
114 }\
115 \
116 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
117 {\
118  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
119 }\
120 \
121 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
122 {\
123  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
124 }\
125 \
126 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
127 {\
128  uint8_t full[SIZE*(SIZE+5)];\
129  uint8_t * const full_mid = full + SIZE*2;\
130  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
131  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
132 }\
133 \
134 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
135 {\
136  uint8_t full[SIZE*(SIZE+5)];\
137  uint8_t * const full_mid = full + SIZE*2;\
138  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
139  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
140 }\
141 \
142 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
143 {\
144  uint8_t full[SIZE*(SIZE+5)];\
145  uint8_t * const full_mid = full + SIZE*2;\
146  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
147  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
148 }\
149 \
150 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
151 {\
152  uint8_t full[SIZE*(SIZE+5)];\
153  uint8_t * const full_mid = full + SIZE*2;\
154  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
155  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
156 }\
157 \
158 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
159 {\
160  uint8_t full[SIZE*(SIZE+5)];\
161  uint8_t * const full_mid = full + SIZE*2;\
162  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
163  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
164 }\
165 \
166 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
167 {\
168  uint8_t full[SIZE*(SIZE+5)];\
169  uint8_t * const full_mid = full + SIZE*2;\
170  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
171  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
172 }\
173 \
174 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
175 {\
176  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
177 }\
178 \
179 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
180 {\
181  uint8_t full[SIZE*(SIZE+5)];\
182  uint8_t * const full_mid = full + SIZE*2;\
183  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
184  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
185 }\
186 \
187 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
188 {\
189  uint8_t full[SIZE*(SIZE+5)];\
190  uint8_t * const full_mid = full + SIZE*2;\
191  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
192  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
193 }\
194 \
195 
196 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
197 #define op_put(a, b) a = cm[b]
198 
199 RV40_LOWPASS(put_ , op_put)
200 RV40_LOWPASS(avg_ , op_avg)
201 
202 #undef op_avg
203 #undef op_put
204 
205 RV40_MC(put_, 8)
206 RV40_MC(put_, 16)
207 RV40_MC(avg_, 8)
208 RV40_MC(avg_, 16)
209 
210 #define PIXOP2(OPNAME, OP) \
211 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
212  const uint8_t *pixels, \
213  ptrdiff_t line_size, \
214  int h) \
215 { \
216  /* FIXME HIGH BIT DEPTH */ \
217  int j; \
218  \
219  for (j = 0; j < 2; j++) { \
220  int i; \
221  const uint32_t a = AV_RN32(pixels); \
222  const uint32_t b = AV_RN32(pixels + 1); \
223  uint32_t l0 = (a & 0x03030303UL) + \
224  (b & 0x03030303UL) + \
225  0x02020202UL; \
226  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
227  ((b & 0xFCFCFCFCUL) >> 2); \
228  uint32_t l1, h1; \
229  \
230  pixels += line_size; \
231  for (i = 0; i < h; i += 2) { \
232  uint32_t a = AV_RN32(pixels); \
233  uint32_t b = AV_RN32(pixels + 1); \
234  l1 = (a & 0x03030303UL) + \
235  (b & 0x03030303UL); \
236  h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
237  ((b & 0xFCFCFCFCUL) >> 2); \
238  OP(*((uint32_t *) block), \
239  h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
240  pixels += line_size; \
241  block += line_size; \
242  a = AV_RN32(pixels); \
243  b = AV_RN32(pixels + 1); \
244  l0 = (a & 0x03030303UL) + \
245  (b & 0x03030303UL) + \
246  0x02020202UL; \
247  h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
248  ((b & 0xFCFCFCFCUL) >> 2); \
249  OP(*((uint32_t *) block), \
250  h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
251  pixels += line_size; \
252  block += line_size; \
253  } \
254  pixels += 4 - line_size * (h + 1); \
255  block += 4 - line_size * h; \
256  } \
257 } \
258  \
259 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
260  OPNAME ## _pixels8_xy2_8_c, \
261  8) \
262 
263 #define op_avg(a, b) a = rnd_avg32(a, b)
264 #define op_put(a, b) a = b
265 PIXOP2(avg, op_avg)
266 PIXOP2(put, op_put)
267 #undef op_avg
268 #undef op_put
269 
270 static void put_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
271 {
272  put_pixels16_xy2_8_c(dst, src, stride, 16);
273 }
274 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
275 {
276  avg_pixels16_xy2_8_c(dst, src, stride, 16);
277 }
278 static void put_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
279 {
280  put_pixels8_xy2_8_c(dst, src, stride, 8);
281 }
282 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
283 {
284  avg_pixels8_xy2_8_c(dst, src, stride, 8);
285 }
286 
287 static const int rv40_bias[4][4] = {
288  { 0, 16, 32, 16 },
289  { 32, 28, 32, 28 },
290  { 0, 32, 16, 32 },
291  { 32, 28, 32, 28 }
292 };
293 
294 #define RV40_CHROMA_MC(OPNAME, OP)\
295 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst /*align 8*/,\
296  uint8_t *src /*align 1*/,\
297  ptrdiff_t stride, int h, int x, int y)\
298 {\
299  const int A = (8-x) * (8-y);\
300  const int B = ( x) * (8-y);\
301  const int C = (8-x) * ( y);\
302  const int D = ( x) * ( y);\
303  int i;\
304  int bias = rv40_bias[y>>1][x>>1];\
305  \
306  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
307 \
308  if(D){\
309  for(i = 0; i < h; i++){\
310  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
311  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
312  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
313  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
314  dst += stride;\
315  src += stride;\
316  }\
317  }else{\
318  const int E = B + C;\
319  const ptrdiff_t step = C ? stride : 1;\
320  for(i = 0; i < h; i++){\
321  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
322  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
323  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
324  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
325  dst += stride;\
326  src += stride;\
327  }\
328  }\
329 }\
330 \
331 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/,\
332  uint8_t *src/*align 1*/,\
333  ptrdiff_t stride, int h, int x, int y)\
334 {\
335  const int A = (8-x) * (8-y);\
336  const int B = ( x) * (8-y);\
337  const int C = (8-x) * ( y);\
338  const int D = ( x) * ( y);\
339  int i;\
340  int bias = rv40_bias[y>>1][x>>1];\
341  \
342  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
343 \
344  if(D){\
345  for(i = 0; i < h; i++){\
346  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
347  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
348  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
349  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
350  OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
351  OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
352  OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
353  OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
354  dst += stride;\
355  src += stride;\
356  }\
357  }else{\
358  const int E = B + C;\
359  const ptrdiff_t step = C ? stride : 1;\
360  for(i = 0; i < h; i++){\
361  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
362  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
363  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
364  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
365  OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
366  OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
367  OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
368  OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
369  dst += stride;\
370  src += stride;\
371  }\
372  }\
373 }
374 
375 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
376 #define op_put(a, b) a = ((b)>>6)
377 
378 RV40_CHROMA_MC(put_, op_put)
379 RV40_CHROMA_MC(avg_, op_avg)
380 
381 #define RV40_WEIGHT_FUNC(size) \
382 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
383 {\
384  int i, j;\
385 \
386  for (j = 0; j < size; j++) {\
387  for (i = 0; i < size; i++)\
388  dst[i] = ((((unsigned)w2 * src1[i]) >> 9) + (((unsigned)w1 * src2[i]) >> 9) + 0x10) >> 5;\
389  src1 += stride;\
390  src2 += stride;\
391  dst += stride;\
392  }\
393 }\
394 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
395 {\
396  int i, j;\
397 \
398  for (j = 0; j < size; j++) {\
399  for (i = 0; i < size; i++)\
400  dst[i] = ((unsigned)w2 * src1[i] + (unsigned)w1 * src2[i] + 0x10) >> 5;\
401  src1 += stride;\
402  src2 += stride;\
403  dst += stride;\
404  }\
405 }
406 
409 
410 /**
411  * dither values for deblocking filter - left/top values
412  */
413 static const uint8_t rv40_dither_l[16] = {
414  0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
415  0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
416 };
417 
418 /**
419  * dither values for deblocking filter - right/bottom values
420  */
421 static const uint8_t rv40_dither_r[16] = {
422  0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
423  0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
424 };
425 
426 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
427 /**
428  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
429  */
431  const int step,
432  const ptrdiff_t stride,
433  const int filter_p1,
434  const int filter_q1,
435  const int alpha,
436  const int beta,
437  const int lim_p0q0,
438  const int lim_q1,
439  const int lim_p1)
440 {
441  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
442  int i, t, u, diff;
443 
444  for (i = 0; i < 4; i++, src += stride) {
445  int diff_p1p0 = src[-2*step] - src[-1*step];
446  int diff_q1q0 = src[ 1*step] - src[ 0*step];
447  int diff_p1p2 = src[-2*step] - src[-3*step];
448  int diff_q1q2 = src[ 1*step] - src[ 2*step];
449 
450  t = src[0*step] - src[-1*step];
451  if (!t)
452  continue;
453 
454  u = (alpha * FFABS(t)) >> 7;
455  if (u > 3 - (filter_p1 && filter_q1))
456  continue;
457 
458  t *= 1 << 2;
459  if (filter_p1 && filter_q1)
460  t += src[-2*step] - src[1*step];
461 
462  diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
463  src[-1*step] = cm[src[-1*step] + diff];
464  src[ 0*step] = cm[src[ 0*step] - diff];
465 
466  if (filter_p1 && FFABS(diff_p1p2) <= beta) {
467  t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
468  src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
469  }
470 
471  if (filter_q1 && FFABS(diff_q1q2) <= beta) {
472  t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
473  src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
474  }
475  }
476 }
477 
478 static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
479  const int filter_p1, const int filter_q1,
480  const int alpha, const int beta,
481  const int lim_p0q0, const int lim_q1,
482  const int lim_p1)
483 {
484  rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
485  alpha, beta, lim_p0q0, lim_q1, lim_p1);
486 }
487 
488 static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
489  const int filter_p1, const int filter_q1,
490  const int alpha, const int beta,
491  const int lim_p0q0, const int lim_q1,
492  const int lim_p1)
493 {
494  rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
495  alpha, beta, lim_p0q0, lim_q1, lim_p1);
496 }
497 
499  const int step,
500  const ptrdiff_t stride,
501  const int alpha,
502  const int lims,
503  const int dmode,
504  const int chroma)
505 {
506  int i;
507 
508  for(i = 0; i < 4; i++, src += stride){
509  int sflag, p0, q0, p1, q1;
510  int t = src[0*step] - src[-1*step];
511 
512  if (!t)
513  continue;
514 
515  sflag = (alpha * FFABS(t)) >> 7;
516  if (sflag > 1)
517  continue;
518 
519  p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
520  26*src[ 0*step] + 25*src[ 1*step] +
521  rv40_dither_l[dmode + i]) >> 7;
522 
523  q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
524  26*src[ 1*step] + 25*src[ 2*step] +
525  rv40_dither_r[dmode + i]) >> 7;
526 
527  if (sflag) {
528  p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
529  q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
530  }
531 
532  p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
533  25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
534  q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
535  25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
536 
537  if (sflag) {
538  p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
539  q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
540  }
541 
542  src[-2*step] = p1;
543  src[-1*step] = p0;
544  src[ 0*step] = q0;
545  src[ 1*step] = q1;
546 
547  if(!chroma){
548  src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
549  51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
550  src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
551  51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
552  }
553  }
554 }
555 
556 static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
557  const int alpha, const int lims,
558  const int dmode, const int chroma)
559 {
560  rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
561 }
562 
563 static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
564  const int alpha, const int lims,
565  const int dmode, const int chroma)
566 {
567  rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
568 }
569 
571  int step, ptrdiff_t stride,
572  int beta, int beta2,
573  int edge,
574  int *p1, int *q1)
575 {
576  int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
577  int strong0 = 0, strong1 = 0;
578  uint8_t *ptr;
579  int i;
580 
581  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
582  sum_p1p0 += ptr[-2*step] - ptr[-1*step];
583  sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
584  }
585 
586  *p1 = FFABS(sum_p1p0) < (beta << 2);
587  *q1 = FFABS(sum_q1q0) < (beta << 2);
588 
589  if(!*p1 && !*q1)
590  return 0;
591 
592  if (!edge)
593  return 0;
594 
595  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
596  sum_p1p2 += ptr[-2*step] - ptr[-3*step];
597  sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
598  }
599 
600  strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
601  strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
602 
603  return strong0 && strong1;
604 }
605 
607  int beta, int beta2, int edge,
608  int *p1, int *q1)
609 {
610  return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
611 }
612 
614  int beta, int beta2, int edge,
615  int *p1, int *q1)
616 {
617  return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
618 }
619 
621 {
622  H264QpelContext qpel;
623 
625  ff_h264qpel_init(&qpel, 8);
626 
627  c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
628  c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
629  c->put_pixels_tab[0][ 2] = qpel.put_h264_qpel_pixels_tab[0][2];
630  c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
631  c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
632  c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
633  c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
634  c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
635  c->put_pixels_tab[0][ 8] = qpel.put_h264_qpel_pixels_tab[0][8];
636  c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
637  c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
638  c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
639  c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
640  c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
641  c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
642  c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
643  c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0];
644  c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
645  c->avg_pixels_tab[0][ 2] = qpel.avg_h264_qpel_pixels_tab[0][2];
646  c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
647  c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
648  c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
649  c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
650  c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
651  c->avg_pixels_tab[0][ 8] = qpel.avg_h264_qpel_pixels_tab[0][8];
652  c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
653  c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
654  c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
655  c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
656  c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
657  c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
658  c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
659  c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0];
660  c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
661  c->put_pixels_tab[1][ 2] = qpel.put_h264_qpel_pixels_tab[1][2];
662  c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
663  c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
664  c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
665  c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
666  c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
667  c->put_pixels_tab[1][ 8] = qpel.put_h264_qpel_pixels_tab[1][8];
668  c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
669  c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
670  c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
671  c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
672  c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
673  c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
674  c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
675  c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0];
676  c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
677  c->avg_pixels_tab[1][ 2] = qpel.avg_h264_qpel_pixels_tab[1][2];
678  c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
679  c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
680  c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
681  c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
682  c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
683  c->avg_pixels_tab[1][ 8] = qpel.avg_h264_qpel_pixels_tab[1][8];
684  c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
685  c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
686  c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
687  c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
688  c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
689  c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
690  c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
691 
692  c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
693  c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
694  c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
695  c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
696 
697  c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16;
698  c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8;
699  c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16;
700  c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8;
701 
702  c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter;
703  c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter;
704  c->rv40_strong_loop_filter[0] = rv40_h_strong_loop_filter;
705  c->rv40_strong_loop_filter[1] = rv40_v_strong_loop_filter;
706  c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
707  c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
708 
709  if (ARCH_AARCH64)
711  if (ARCH_ARM)
713  if (ARCH_X86)
715 }
stride
int stride
Definition: mace.c:144
q1
static const uint8_t q1[256]
Definition: twofish.c:96
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:252
rv40_h_strong_loop_filter
static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:556
put_rv40_qpel8_mc33_c
static void put_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:278
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1511
rv40_strong_loop_filter
static av_always_inline void rv40_strong_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:498
pixels.h
RV40_LOWPASS
#define RV40_LOWPASS(OPNAME, OP)
Definition: rv40dsp.c:37
rv40_h_weak_loop_filter
static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:478
ff_crop_tab
#define ff_crop_tab
Definition: motionpixels_tablegen.c:26
H264QpelContext::avg_h264_qpel_pixels_tab
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:29
h264qpel.h
ff_rv40dsp_init
av_cold void ff_rv40dsp_init(RV34DSPContext *c)
Definition: rv40dsp.c:620
ff_h264qpel_init
av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:49
src
#define src
Definition: vp8dsp.c:254
avassert.h
av_cold
#define av_cold
Definition: attributes.h:84
avg_rv40_qpel16_mc33_c
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:274
rv40_v_strong_loop_filter
static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:563
intreadwrite.h
rv40_v_weak_loop_filter
static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:488
q0
static const uint8_t q0[256]
Definition: twofish.c:77
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
ff_rv34dsp_init
av_cold void ff_rv34dsp_init(RV34DSPContext *c)
Definition: rv34dsp.c:131
rv40_loop_filter_strength
static av_always_inline int rv40_loop_filter_strength(uint8_t *src, int step, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:570
mathops.h
rv40_dither_r
static const uint8_t rv40_dither_r[16]
dither values for deblocking filter - right/bottom values
Definition: rv40dsp.c:421
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
RV34DSPContext
Definition: rv34dsp.h:57
CLIP_SYMM
#define CLIP_SYMM(a, b)
Definition: rv40dsp.c:426
put_rv40_qpel16_mc33_c
static void put_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:270
RV40_MC
#define RV40_MC(OPNAME, SIZE)
Definition: rv40dsp.c:110
rv34dsp.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
op_put
#define op_put(a, b)
Definition: rv40dsp.c:376
rv40_weak_loop_filter
static av_always_inline void rv40_weak_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
Definition: rv40dsp.c:430
RV40_WEIGHT_FUNC
#define RV40_WEIGHT_FUNC(size)
Definition: rv40dsp.c:381
rv40_h_loop_filter_strength
static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:606
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:43
uint8_t
uint8_t
Definition: audio_convert.c:194
op_avg
#define op_avg(a, b)
Definition: rv40dsp.c:375
ff_rv40dsp_init_aarch64
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
Definition: rv40dsp_init_aarch64.c:38
H264QpelContext
Definition: h264qpel.h:27
avcodec.h
rv40_bias
static const int rv40_bias[4][4]
Definition: rv40dsp.c:287
rnd_avg.h
rv40_dither_l
static const uint8_t rv40_dither_l[16]
dither values for deblocking filter - left/top values
Definition: rv40dsp.c:413
avg_rv40_qpel8_mc33_c
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:282
PIXOP2
#define PIXOP2(OPNAME, OP)
Definition: rv40dsp.c:210
H264QpelContext::put_h264_qpel_pixels_tab
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:28
cm
#define cm
Definition: dvbsubdec.c:37
diff
static av_always_inline int diff(const uint32_t a, const uint32_t b)
Definition: vf_palettegen.c:136
rv40_v_loop_filter_strength
static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:613
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
RV40_CHROMA_MC
#define RV40_CHROMA_MC(OPNAME, OP)
Definition: rv40dsp.c:294
ff_rv40dsp_init_x86
void ff_rv40dsp_init_x86(RV34DSPContext *c)
Definition: rv40dsp_init.c:215
ff_rv40dsp_init_arm
av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
Definition: rv40dsp_init_arm.c:144
MAX_NEG_CROP
#define MAX_NEG_CROP
Definition: mathops.h:31