00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "libavutil/attributes.h"
00028 #include "libavutil/common.h"
00029 #include "avcodec.h"
00030 #include "dsputil.h"
00031 #include "vp3dsp.h"
00032
00033 #define IdctAdjustBeforeShift 8
00034 #define xC1S7 64277
00035 #define xC2S6 60547
00036 #define xC3S5 54491
00037 #define xC4S4 46341
00038 #define xC5S3 36410
00039 #define xC6S2 25080
00040 #define xC7S1 12785
00041
00042 #define M(a,b) (((a) * (b))>>16)
00043
00044 static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
00045 {
00046 int16_t *ip = input;
00047
00048 int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
00049 int Ed, Gd, Add, Bdd, Fd, Hd;
00050
00051 int i;
00052
00053
00054 for (i = 0; i < 8; i++) {
00055
00056 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
00057 A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
00058 B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
00059 C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
00060 D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
00061
00062 Ad = M(xC4S4, (A - C));
00063 Bd = M(xC4S4, (B - D));
00064
00065 Cd = A + C;
00066 Dd = B + D;
00067
00068 E = M(xC4S4, (ip[0] + ip[4]));
00069 F = M(xC4S4, (ip[0] - ip[4]));
00070
00071 G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
00072 H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
00073
00074 Ed = E - G;
00075 Gd = E + G;
00076
00077 Add = F + Ad;
00078 Bdd = Bd - H;
00079
00080 Fd = F - Ad;
00081 Hd = Bd + H;
00082
00083
00084 ip[0] = Gd + Cd ;
00085 ip[7] = Gd - Cd ;
00086
00087 ip[1] = Add + Hd;
00088 ip[2] = Add - Hd;
00089
00090 ip[3] = Ed + Dd ;
00091 ip[4] = Ed - Dd ;
00092
00093 ip[5] = Fd + Bdd;
00094 ip[6] = Fd - Bdd;
00095 }
00096
00097 ip += 8;
00098 }
00099
00100 ip = input;
00101
00102 for ( i = 0; i < 8; i++) {
00103
00104 if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
00105 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
00106
00107 A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]);
00108 B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]);
00109 C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]);
00110 D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]);
00111
00112 Ad = M(xC4S4, (A - C));
00113 Bd = M(xC4S4, (B - D));
00114
00115 Cd = A + C;
00116 Dd = B + D;
00117
00118 E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8;
00119 F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8;
00120
00121 if(type==1){
00122 E += 16*128;
00123 F += 16*128;
00124 }
00125
00126 G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]);
00127 H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]);
00128
00129 Ed = E - G;
00130 Gd = E + G;
00131
00132 Add = F + Ad;
00133 Bdd = Bd - H;
00134
00135 Fd = F - Ad;
00136 Hd = Bd + H;
00137
00138
00139 if(type==0){
00140 ip[0*8] = (Gd + Cd ) >> 4;
00141 ip[7*8] = (Gd - Cd ) >> 4;
00142
00143 ip[1*8] = (Add + Hd ) >> 4;
00144 ip[2*8] = (Add - Hd ) >> 4;
00145
00146 ip[3*8] = (Ed + Dd ) >> 4;
00147 ip[4*8] = (Ed - Dd ) >> 4;
00148
00149 ip[5*8] = (Fd + Bdd ) >> 4;
00150 ip[6*8] = (Fd - Bdd ) >> 4;
00151 }else if(type==1){
00152 dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
00153 dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
00154
00155 dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
00156 dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
00157
00158 dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
00159 dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
00160
00161 dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
00162 dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
00163 }else{
00164 dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
00165 dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
00166
00167 dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
00168 dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
00169
00170 dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
00171 dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
00172
00173 dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
00174 dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
00175 }
00176
00177 } else {
00178 if(type==0){
00179 ip[0*8] =
00180 ip[1*8] =
00181 ip[2*8] =
00182 ip[3*8] =
00183 ip[4*8] =
00184 ip[5*8] =
00185 ip[6*8] =
00186 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00187 }else if(type==1){
00188 dst[0*stride]=
00189 dst[1*stride]=
00190 dst[2*stride]=
00191 dst[3*stride]=
00192 dst[4*stride]=
00193 dst[5*stride]=
00194 dst[6*stride]=
00195 dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
00196 }else{
00197 if(ip[0*8]){
00198 int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00199 dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
00200 dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
00201 dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
00202 dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
00203 dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
00204 dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
00205 dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
00206 dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
00207 }
00208 }
00209 }
00210
00211 ip++;
00212 dst++;
00213 }
00214 }
00215
00216 static void vp3_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block){
00217 idct(dest, line_size, block, 1);
00218 }
00219
00220 static void vp3_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block){
00221 idct(dest, line_size, block, 2);
00222 }
00223
00224 static void vp3_idct_dc_add_c(uint8_t *dest, int line_size,
00225 const DCTELEM *block){
00226 int i, dc = (block[0] + 15) >> 5;
00227
00228 for(i = 0; i < 8; i++){
00229 dest[0] = av_clip_uint8(dest[0] + dc);
00230 dest[1] = av_clip_uint8(dest[1] + dc);
00231 dest[2] = av_clip_uint8(dest[2] + dc);
00232 dest[3] = av_clip_uint8(dest[3] + dc);
00233 dest[4] = av_clip_uint8(dest[4] + dc);
00234 dest[5] = av_clip_uint8(dest[5] + dc);
00235 dest[6] = av_clip_uint8(dest[6] + dc);
00236 dest[7] = av_clip_uint8(dest[7] + dc);
00237 dest += line_size;
00238 }
00239 }
00240
00241 static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
00242 int *bounding_values)
00243 {
00244 unsigned char *end;
00245 int filter_value;
00246 const int nstride= -stride;
00247
00248 for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
00249 filter_value =
00250 (first_pixel[2 * nstride] - first_pixel[ stride])
00251 +3*(first_pixel[0 ] - first_pixel[nstride]);
00252 filter_value = bounding_values[(filter_value + 4) >> 3];
00253 first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
00254 first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
00255 }
00256 }
00257
00258 static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
00259 int *bounding_values)
00260 {
00261 unsigned char *end;
00262 int filter_value;
00263
00264 for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
00265 filter_value =
00266 (first_pixel[-2] - first_pixel[ 1])
00267 +3*(first_pixel[ 0] - first_pixel[-1]);
00268 filter_value = bounding_values[(filter_value + 4) >> 3];
00269 first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
00270 first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
00271 }
00272 }
00273
00274 av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
00275 {
00276 c->idct_put = vp3_idct_put_c;
00277 c->idct_add = vp3_idct_add_c;
00278 c->idct_dc_add = vp3_idct_dc_add_c;
00279 c->v_loop_filter = vp3_v_loop_filter_c;
00280 c->h_loop_filter = vp3_h_loop_filter_c;
00281
00282 c->idct_perm = FF_NO_IDCT_PERM;
00283
00284 if (ARCH_ARM)
00285 ff_vp3dsp_init_arm(c, flags);
00286 if (ARCH_PPC)
00287 ff_vp3dsp_init_ppc(c, flags);
00288 if (ARCH_X86)
00289 ff_vp3dsp_init_x86(c, flags);
00290 }