33 #define RV40_LOWPASS(OPNAME, OP) \
34 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
35 const int h, const int C1, const int C2, const int SHIFT){\
36 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
38 for(i = 0; i < h; i++)\
40 OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
41 OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
42 OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
43 OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44 OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45 OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46 OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
47 OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
53 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
54 const int w, const int C1, const int C2, const int SHIFT){\
55 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
57 for(i = 0; i < w; i++)\
59 const int srcB = src[-2*srcStride];\
60 const int srcA = src[-1*srcStride];\
61 const int src0 = src[0 *srcStride];\
62 const int src1 = src[1 *srcStride];\
63 const int src2 = src[2 *srcStride];\
64 const int src3 = src[3 *srcStride];\
65 const int src4 = src[4 *srcStride];\
66 const int src5 = src[5 *srcStride];\
67 const int src6 = src[6 *srcStride];\
68 const int src7 = src[7 *srcStride];\
69 const int src8 = src[8 *srcStride];\
70 const int src9 = src[9 *srcStride];\
71 const int src10 = src[10*srcStride];\
72 OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
73 OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
74 OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
75 OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76 OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77 OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78 OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
79 OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
85 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
86 const int w, const int C1, const int C2, const int SHIFT){\
87 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
88 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
91 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
92 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
95 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
96 const int h, const int C1, const int C2, const int SHIFT){\
97 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
98 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
101 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
102 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
106 #define RV40_MC(OPNAME, SIZE) \
107 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
108 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
111 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
112 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
115 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
116 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
119 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
120 uint8_t full[SIZE*(SIZE+5)];\
121 uint8_t * const full_mid = full + SIZE*2;\
122 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
123 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
126 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
127 uint8_t full[SIZE*(SIZE+5)];\
128 uint8_t * const full_mid = full + SIZE*2;\
129 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
130 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
133 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
134 uint8_t full[SIZE*(SIZE+5)];\
135 uint8_t * const full_mid = full + SIZE*2;\
136 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
137 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
140 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
141 uint8_t full[SIZE*(SIZE+5)];\
142 uint8_t * const full_mid = full + SIZE*2;\
143 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
144 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
147 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
148 uint8_t full[SIZE*(SIZE+5)];\
149 uint8_t * const full_mid = full + SIZE*2;\
150 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
151 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
154 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
155 uint8_t full[SIZE*(SIZE+5)];\
156 uint8_t * const full_mid = full + SIZE*2;\
157 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
158 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
161 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
162 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
165 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
166 uint8_t full[SIZE*(SIZE+5)];\
167 uint8_t * const full_mid = full + SIZE*2;\
168 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
169 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
172 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
173 uint8_t full[SIZE*(SIZE+5)];\
174 uint8_t * const full_mid = full + SIZE*2;\
175 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
176 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
180 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
181 #define op_put(a, b) a = cm[b]
201 #define RV40_CHROMA_MC(OPNAME, OP)\
202 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
203 const int A = (8-x) * (8-y);\
204 const int B = ( x) * (8-y);\
205 const int C = (8-x) * ( y);\
206 const int D = ( x) * ( y);\
208 int bias = rv40_bias[y>>1][x>>1];\
210 av_assert2(x<8 && y<8 && x>=0 && y>=0);\
213 for(i = 0; i < h; i++){\
214 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
215 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
216 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
217 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
222 const int E = B + C;\
223 const int step = C ? stride : 1;\
224 for(i = 0; i < h; i++){\
225 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
226 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
227 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
228 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
235 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
236 const int A = (8-x) * (8-y);\
237 const int B = ( x) * (8-y);\
238 const int C = (8-x) * ( y);\
239 const int D = ( x) * ( y);\
241 int bias = rv40_bias[y>>1][x>>1];\
243 av_assert2(x<8 && y<8 && x>=0 && y>=0);\
246 for(i = 0; i < h; i++){\
247 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
248 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
249 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
250 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
251 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
252 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
253 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
254 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
259 const int E = B + C;\
260 const int step = C ? stride : 1;\
261 for(i = 0; i < h; i++){\
262 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
263 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
264 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
265 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
266 OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
267 OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
268 OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
269 OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
276 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
277 #define op_put(a, b) a = ((b)>>6)
282 #define RV40_WEIGHT_FUNC(size) \
283 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
287 for (j = 0; j < size; j++) {\
288 for (i = 0; i < size; i++)\
289 dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
295 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
299 for (j = 0; j < size; j++) {\
300 for (i = 0; i < size; i++)\
301 dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
315 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
316 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
323 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
324 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
327 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
345 for (i = 0; i < 4; i++, src +=
stride) {
346 int diff_p1p0 = src[-2*step] - src[-1*step];
347 int diff_q1q0 = src[ 1*step] - src[ 0*step];
348 int diff_p1p2 = src[-2*step] - src[-3*step];
349 int diff_q1q2 = src[ 1*step] - src[ 2*step];
351 t = src[0*step] - src[-1*step];
355 u = (alpha *
FFABS(t)) >> 7;
356 if (u > 3 - (filter_p1 && filter_q1))
360 if (filter_p1 && filter_q1)
361 t += src[-2*step] - src[1*step];
363 diff =
CLIP_SYMM((t + 4) >> 3, lim_p0q0);
364 src[-1*step] = cm[src[-1*step] +
diff];
365 src[ 0*step] = cm[src[ 0*step] -
diff];
367 if (filter_p1 &&
FFABS(diff_p1p2) <= beta) {
368 t = (diff_p1p0 + diff_p1p2 -
diff) >> 1;
369 src[-2*step] = cm[src[-2*step] -
CLIP_SYMM(t, lim_p1)];
372 if (filter_q1 &&
FFABS(diff_q1q2) <= beta) {
373 t = (diff_q1q0 + diff_q1q2 +
diff) >> 1;
374 src[ 1*step] = cm[src[ 1*step] -
CLIP_SYMM(t, lim_q1)];
380 const int filter_p1,
const int filter_q1,
381 const int alpha,
const int beta,
382 const int lim_p0q0,
const int lim_q1,
386 alpha, beta, lim_p0q0, lim_q1, lim_p1);
390 const int filter_p1,
const int filter_q1,
391 const int alpha,
const int beta,
392 const int lim_p0q0,
const int lim_q1,
396 alpha, beta, lim_p0q0, lim_q1, lim_p1);
409 for(i = 0; i < 4; i++, src +=
stride){
410 int sflag, p0, q0, p1, q1;
411 int t = src[0*step] - src[-1*step];
416 sflag = (alpha *
FFABS(t)) >> 7;
420 p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
421 26*src[ 0*step] + 25*src[ 1*step] +
424 q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
425 26*src[ 1*step] + 25*src[ 2*step] +
426 rv40_dither_r[dmode + i]) >> 7;
429 p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
430 q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
433 p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
435 q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
436 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
439 p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
440 q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
449 src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
450 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
451 src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
452 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
458 const int alpha,
const int lims,
459 const int dmode,
const int chroma)
465 const int alpha,
const int lims,
466 const int dmode,
const int chroma)
472 int step, ptrdiff_t
stride,
477 int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
478 int strong0 = 0, strong1 = 0;
482 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
483 sum_p1p0 += ptr[-2*step] - ptr[-1*step];
484 sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
487 *p1 =
FFABS(sum_p1p0) < (beta << 2);
488 *q1 =
FFABS(sum_q1q0) < (beta << 2);
496 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
497 sum_p1p2 += ptr[-2*step] - ptr[-3*step];
498 sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
501 strong0 = *p1 && (
FFABS(sum_p1p2) < beta2);
502 strong1 = *q1 && (
FFABS(sum_q1q2) < beta2);
504 return strong0 && strong1;
508 int beta,
int beta2,
int edge,
515 int beta,
int beta2,
int edge,