00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093 #include "config.h"
00094 #include "rgb2rgb.h"
00095 #include "swscale.h"
00096 #include "swscale_internal.h"
00097
00098 #undef PROFILE_THE_BEAST
00099 #undef INC_SCALING
00100
00101 typedef unsigned char ubyte;
00102 typedef signed char sbyte;
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 static
00142 const vector unsigned char
00143 perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00144 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
00145 perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00146 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
00147 perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00148 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
00149 perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00150 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
00151
00152 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00153 do { \
00154 __typeof__(x0) o0,o2,o3; \
00155 o0 = vec_mergeh (x0,x1); \
00156 y0 = vec_perm (o0, x2, perm_rgb_0); \
00157 o2 = vec_perm (o0, x2, perm_rgb_1); \
00158 o3 = vec_mergel (x0,x1); \
00159 y1 = vec_perm (o3,o2,perm_rgb_2); \
00160 y2 = vec_perm (o3,o2,perm_rgb_3); \
00161 } while(0)
00162
00163 #define vec_mstbgr24(x0,x1,x2,ptr) \
00164 do { \
00165 __typeof__(x0) _0,_1,_2; \
00166 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00167 vec_st (_0, 0, ptr++); \
00168 vec_st (_1, 0, ptr++); \
00169 vec_st (_2, 0, ptr++); \
00170 } while (0);
00171
00172 #define vec_mstrgb24(x0,x1,x2,ptr) \
00173 do { \
00174 __typeof__(x0) _0,_1,_2; \
00175 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00176 vec_st (_0, 0, ptr++); \
00177 vec_st (_1, 0, ptr++); \
00178 vec_st (_2, 0, ptr++); \
00179 } while (0);
00180
00181
00182
00183
00184
00185 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00186 do { \
00187 T _0,_1,_2,_3; \
00188 _0 = vec_mergeh (x0,x1); \
00189 _1 = vec_mergeh (x2,x3); \
00190 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00191 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00192 vec_st (_2, 0*16, (T *)ptr); \
00193 vec_st (_3, 1*16, (T *)ptr); \
00194 _0 = vec_mergel (x0,x1); \
00195 _1 = vec_mergel (x2,x3); \
00196 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00197 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00198 vec_st (_2, 2*16, (T *)ptr); \
00199 vec_st (_3, 3*16, (T *)ptr); \
00200 ptr += 4; \
00201 } while (0);
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define vec_unh(x) \
00221 (vector signed short) \
00222 vec_perm(x,(__typeof__(x)){0}, \
00223 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00224 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
00225 #define vec_unl(x) \
00226 (vector signed short) \
00227 vec_perm(x,(__typeof__(x)){0}, \
00228 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00229 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
00230
00231 #define vec_clip_s16(x) \
00232 vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
00233 ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
00234
00235 #define vec_packclp(x,y) \
00236 (vector unsigned char)vec_packs \
00237 ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
00238 (vector unsigned short)vec_max (y,((vector signed short) {0})))
00239
00240
00241
00242
00243 static inline void cvtyuvtoRGB (SwsContext *c,
00244 vector signed short Y, vector signed short U, vector signed short V,
00245 vector signed short *R, vector signed short *G, vector signed short *B)
00246 {
00247 vector signed short vx,ux,uvx;
00248
00249 Y = vec_mradds (Y, c->CY, c->OY);
00250 U = vec_sub (U,(vector signed short)
00251 vec_splat((vector signed short){128},0));
00252 V = vec_sub (V,(vector signed short)
00253 vec_splat((vector signed short){128},0));
00254
00255
00256 ux = vec_sl (U, c->CSHIFT);
00257 *B = vec_mradds (ux, c->CBU, Y);
00258
00259
00260 vx = vec_sl (V, c->CSHIFT);
00261 *R = vec_mradds (vx, c->CRV, Y);
00262
00263
00264 uvx = vec_mradds (U, c->CGU, Y);
00265 *G = vec_mradds (V, c->CGV, uvx);
00266 }
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276 #define DEFCSP420_CVT(name,out_pixels) \
00277 static int altivec_##name (SwsContext *c, \
00278 unsigned char **in, int *instrides, \
00279 int srcSliceY, int srcSliceH, \
00280 unsigned char **oplanes, int *outstrides) \
00281 { \
00282 int w = c->srcW; \
00283 int h = srcSliceH; \
00284 int i,j; \
00285 int instrides_scl[3]; \
00286 vector unsigned char y0,y1; \
00287 \
00288 vector signed char u,v; \
00289 \
00290 vector signed short Y0,Y1,Y2,Y3; \
00291 vector signed short U,V; \
00292 vector signed short vx,ux,uvx; \
00293 vector signed short vx0,ux0,uvx0; \
00294 vector signed short vx1,ux1,uvx1; \
00295 vector signed short R0,G0,B0; \
00296 vector signed short R1,G1,B1; \
00297 vector unsigned char R,G,B; \
00298 \
00299 vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00300 vector unsigned char align_perm; \
00301 \
00302 vector signed short \
00303 lCY = c->CY, \
00304 lOY = c->OY, \
00305 lCRV = c->CRV, \
00306 lCBU = c->CBU, \
00307 lCGU = c->CGU, \
00308 lCGV = c->CGV; \
00309 \
00310 vector unsigned short lCSHIFT = c->CSHIFT; \
00311 \
00312 ubyte *y1i = in[0]; \
00313 ubyte *y2i = in[0]+instrides[0]; \
00314 ubyte *ui = in[1]; \
00315 ubyte *vi = in[2]; \
00316 \
00317 vector unsigned char *oute \
00318 = (vector unsigned char *) \
00319 (oplanes[0]+srcSliceY*outstrides[0]); \
00320 vector unsigned char *outo \
00321 = (vector unsigned char *) \
00322 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00323 \
00324 \
00325 instrides_scl[0] = instrides[0]*2-w; \
00326 instrides_scl[1] = instrides[1]-w/2; \
00327 instrides_scl[2] = instrides[2]-w/2; \
00328 \
00329 \
00330 for (i=0;i<h/2;i++) { \
00331 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00332 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00333 \
00334 for (j=0;j<w/16;j++) { \
00335 \
00336 y1ivP = (vector unsigned char *)y1i; \
00337 y2ivP = (vector unsigned char *)y2i; \
00338 uivP = (vector unsigned char *)ui; \
00339 vivP = (vector unsigned char *)vi; \
00340 \
00341 align_perm = vec_lvsl (0, y1i); \
00342 y0 = (vector unsigned char) \
00343 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
00344 \
00345 align_perm = vec_lvsl (0, y2i); \
00346 y1 = (vector unsigned char) \
00347 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
00348 \
00349 align_perm = vec_lvsl (0, ui); \
00350 u = (vector signed char) \
00351 vec_perm (uivP[0], uivP[1], align_perm); \
00352 \
00353 align_perm = vec_lvsl (0, vi); \
00354 v = (vector signed char) \
00355 vec_perm (vivP[0], vivP[1], align_perm); \
00356 \
00357 u = (vector signed char) \
00358 vec_sub (u,(vector signed char) \
00359 vec_splat((vector signed char){128},0)); \
00360 v = (vector signed char) \
00361 vec_sub (v,(vector signed char) \
00362 vec_splat((vector signed char){128},0)); \
00363 \
00364 U = vec_unpackh (u); \
00365 V = vec_unpackh (v); \
00366 \
00367 \
00368 Y0 = vec_unh (y0); \
00369 Y1 = vec_unl (y0); \
00370 Y2 = vec_unh (y1); \
00371 Y3 = vec_unl (y1); \
00372 \
00373 Y0 = vec_mradds (Y0, lCY, lOY); \
00374 Y1 = vec_mradds (Y1, lCY, lOY); \
00375 Y2 = vec_mradds (Y2, lCY, lOY); \
00376 Y3 = vec_mradds (Y3, lCY, lOY); \
00377 \
00378 \
00379 ux = vec_sl (U, lCSHIFT); \
00380 ux = vec_mradds (ux, lCBU, (vector signed short){0}); \
00381 ux0 = vec_mergeh (ux,ux); \
00382 ux1 = vec_mergel (ux,ux); \
00383 \
00384 \
00385 vx = vec_sl (V, lCSHIFT); \
00386 vx = vec_mradds (vx, lCRV, (vector signed short){0}); \
00387 vx0 = vec_mergeh (vx,vx); \
00388 vx1 = vec_mergel (vx,vx); \
00389 \
00390 \
00391 uvx = vec_mradds (U, lCGU, (vector signed short){0}); \
00392 uvx = vec_mradds (V, lCGV, uvx); \
00393 uvx0 = vec_mergeh (uvx,uvx); \
00394 uvx1 = vec_mergel (uvx,uvx); \
00395 \
00396 R0 = vec_add (Y0,vx0); \
00397 G0 = vec_add (Y0,uvx0); \
00398 B0 = vec_add (Y0,ux0); \
00399 R1 = vec_add (Y1,vx1); \
00400 G1 = vec_add (Y1,uvx1); \
00401 B1 = vec_add (Y1,ux1); \
00402 \
00403 R = vec_packclp (R0,R1); \
00404 G = vec_packclp (G0,G1); \
00405 B = vec_packclp (B0,B1); \
00406 \
00407 out_pixels(R,G,B,oute); \
00408 \
00409 R0 = vec_add (Y2,vx0); \
00410 G0 = vec_add (Y2,uvx0); \
00411 B0 = vec_add (Y2,ux0); \
00412 R1 = vec_add (Y3,vx1); \
00413 G1 = vec_add (Y3,uvx1); \
00414 B1 = vec_add (Y3,ux1); \
00415 R = vec_packclp (R0,R1); \
00416 G = vec_packclp (G0,G1); \
00417 B = vec_packclp (B0,B1); \
00418 \
00419 \
00420 out_pixels(R,G,B,outo); \
00421 \
00422 y1i += 16; \
00423 y2i += 16; \
00424 ui += 8; \
00425 vi += 8; \
00426 \
00427 } \
00428 \
00429 outo += (outstrides[0])>>4; \
00430 oute += (outstrides[0])>>4; \
00431 \
00432 ui += instrides_scl[1]; \
00433 vi += instrides_scl[2]; \
00434 y1i += instrides_scl[0]; \
00435 y2i += instrides_scl[0]; \
00436 } \
00437 return srcSliceH; \
00438 }
00439
00440
00441 #define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
00442 #define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
00443 #define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
00444 #define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
00445 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00446 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00447
00448 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00449 #if 1
00450 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00451 #else
00452 static int altivec_yuv2_bgra32 (SwsContext *c,
00453 unsigned char **in, int *instrides,
00454 int srcSliceY, int srcSliceH,
00455 unsigned char **oplanes, int *outstrides)
00456 {
00457 int w = c->srcW;
00458 int h = srcSliceH;
00459 int i,j;
00460 int instrides_scl[3];
00461 vector unsigned char y0,y1;
00462
00463 vector signed char u,v;
00464
00465 vector signed short Y0,Y1,Y2,Y3;
00466 vector signed short U,V;
00467 vector signed short vx,ux,uvx;
00468 vector signed short vx0,ux0,uvx0;
00469 vector signed short vx1,ux1,uvx1;
00470 vector signed short R0,G0,B0;
00471 vector signed short R1,G1,B1;
00472 vector unsigned char R,G,B;
00473
00474 vector unsigned char *uivP, *vivP;
00475 vector unsigned char align_perm;
00476
00477 vector signed short
00478 lCY = c->CY,
00479 lOY = c->OY,
00480 lCRV = c->CRV,
00481 lCBU = c->CBU,
00482 lCGU = c->CGU,
00483 lCGV = c->CGV;
00484
00485 vector unsigned short lCSHIFT = c->CSHIFT;
00486
00487 ubyte *y1i = in[0];
00488 ubyte *y2i = in[0]+w;
00489 ubyte *ui = in[1];
00490 ubyte *vi = in[2];
00491
00492 vector unsigned char *oute
00493 = (vector unsigned char *)
00494 (oplanes[0]+srcSliceY*outstrides[0]);
00495 vector unsigned char *outo
00496 = (vector unsigned char *)
00497 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
00498
00499
00500 instrides_scl[0] = instrides[0];
00501 instrides_scl[1] = instrides[1]-w/2;
00502 instrides_scl[2] = instrides[2]-w/2;
00503
00504
00505 for (i=0;i<h/2;i++) {
00506 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
00507 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
00508
00509 for (j=0;j<w/16;j++) {
00510
00511 y0 = vec_ldl (0,y1i);
00512 y1 = vec_ldl (0,y2i);
00513 uivP = (vector unsigned char *)ui;
00514 vivP = (vector unsigned char *)vi;
00515
00516 align_perm = vec_lvsl (0, ui);
00517 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
00518
00519 align_perm = vec_lvsl (0, vi);
00520 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
00521 u = (vector signed char)
00522 vec_sub (u,(vector signed char)
00523 vec_splat((vector signed char){128},0));
00524
00525 v = (vector signed char)
00526 vec_sub (v, (vector signed char)
00527 vec_splat((vector signed char){128},0));
00528
00529 U = vec_unpackh (u);
00530 V = vec_unpackh (v);
00531
00532
00533 Y0 = vec_unh (y0);
00534 Y1 = vec_unl (y0);
00535 Y2 = vec_unh (y1);
00536 Y3 = vec_unl (y1);
00537
00538 Y0 = vec_mradds (Y0, lCY, lOY);
00539 Y1 = vec_mradds (Y1, lCY, lOY);
00540 Y2 = vec_mradds (Y2, lCY, lOY);
00541 Y3 = vec_mradds (Y3, lCY, lOY);
00542
00543
00544 ux = vec_sl (U, lCSHIFT);
00545 ux = vec_mradds (ux, lCBU, (vector signed short){0});
00546 ux0 = vec_mergeh (ux,ux);
00547 ux1 = vec_mergel (ux,ux);
00548
00549
00550 vx = vec_sl (V, lCSHIFT);
00551 vx = vec_mradds (vx, lCRV, (vector signed short){0});
00552 vx0 = vec_mergeh (vx,vx);
00553 vx1 = vec_mergel (vx,vx);
00554
00555 uvx = vec_mradds (U, lCGU, (vector signed short){0});
00556 uvx = vec_mradds (V, lCGV, uvx);
00557 uvx0 = vec_mergeh (uvx,uvx);
00558 uvx1 = vec_mergel (uvx,uvx);
00559 R0 = vec_add (Y0,vx0);
00560 G0 = vec_add (Y0,uvx0);
00561 B0 = vec_add (Y0,ux0);
00562 R1 = vec_add (Y1,vx1);
00563 G1 = vec_add (Y1,uvx1);
00564 B1 = vec_add (Y1,ux1);
00565 R = vec_packclp (R0,R1);
00566 G = vec_packclp (G0,G1);
00567 B = vec_packclp (B0,B1);
00568
00569 out_argb(R,G,B,oute);
00570 R0 = vec_add (Y2,vx0);
00571 G0 = vec_add (Y2,uvx0);
00572 B0 = vec_add (Y2,ux0);
00573 R1 = vec_add (Y3,vx1);
00574 G1 = vec_add (Y3,uvx1);
00575 B1 = vec_add (Y3,ux1);
00576 R = vec_packclp (R0,R1);
00577 G = vec_packclp (G0,G1);
00578 B = vec_packclp (B0,B1);
00579
00580 out_argb(R,G,B,outo);
00581 y1i += 16;
00582 y2i += 16;
00583 ui += 8;
00584 vi += 8;
00585
00586 }
00587
00588 outo += (outstrides[0])>>4;
00589 oute += (outstrides[0])>>4;
00590
00591 ui += instrides_scl[1];
00592 vi += instrides_scl[2];
00593 y1i += instrides_scl[0];
00594 y2i += instrides_scl[0];
00595 }
00596 return srcSliceH;
00597 }
00598
00599 #endif
00600
00601
00602 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00603 DEFCSP420_CVT (yuv2_argb, out_argb)
00604 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00605 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00606
00607
00608
00609
00610 static
00611 const vector unsigned char
00612 demux_u = {0x10,0x00,0x10,0x00,
00613 0x10,0x04,0x10,0x04,
00614 0x10,0x08,0x10,0x08,
00615 0x10,0x0c,0x10,0x0c},
00616 demux_v = {0x10,0x02,0x10,0x02,
00617 0x10,0x06,0x10,0x06,
00618 0x10,0x0A,0x10,0x0A,
00619 0x10,0x0E,0x10,0x0E},
00620 demux_y = {0x10,0x01,0x10,0x03,
00621 0x10,0x05,0x10,0x07,
00622 0x10,0x09,0x10,0x0B,
00623 0x10,0x0D,0x10,0x0F};
00624
00625
00626
00627
00628 static int altivec_uyvy_rgb32 (SwsContext *c,
00629 unsigned char **in, int *instrides,
00630 int srcSliceY, int srcSliceH,
00631 unsigned char **oplanes, int *outstrides)
00632 {
00633 int w = c->srcW;
00634 int h = srcSliceH;
00635 int i,j;
00636 vector unsigned char uyvy;
00637 vector signed short Y,U,V;
00638 vector signed short R0,G0,B0,R1,G1,B1;
00639 vector unsigned char R,G,B;
00640 vector unsigned char *out;
00641 ubyte *img;
00642
00643 img = in[0];
00644 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00645
00646 for (i=0;i<h;i++) {
00647 for (j=0;j<w/16;j++) {
00648 uyvy = vec_ld (0, img);
00649 U = (vector signed short)
00650 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00651
00652 V = (vector signed short)
00653 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00654
00655 Y = (vector signed short)
00656 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00657
00658 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00659
00660 uyvy = vec_ld (16, img);
00661 U = (vector signed short)
00662 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00663
00664 V = (vector signed short)
00665 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00666
00667 Y = (vector signed short)
00668 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00669
00670 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00671
00672 R = vec_packclp (R0,R1);
00673 G = vec_packclp (G0,G1);
00674 B = vec_packclp (B0,B1);
00675
00676
00677 out_rgba (R,G,B,out);
00678
00679 img += 32;
00680 }
00681 }
00682 return srcSliceH;
00683 }
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693 SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c)
00694 {
00695 if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
00696 return NULL;
00697
00698
00699
00700
00701
00702
00703
00704
00705 if ((c->srcW & 0xf) != 0) return NULL;
00706
00707 switch (c->srcFormat) {
00708 case PIX_FMT_YUV410P:
00709 case PIX_FMT_YUV420P:
00710
00711 case PIX_FMT_GRAY8:
00712 case PIX_FMT_NV12:
00713 case PIX_FMT_NV21:
00714 if ((c->srcH & 0x1) != 0)
00715 return NULL;
00716
00717 switch(c->dstFormat){
00718 case PIX_FMT_RGB24:
00719 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00720 return altivec_yuv2_rgb24;
00721 case PIX_FMT_BGR24:
00722 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00723 return altivec_yuv2_bgr24;
00724 case PIX_FMT_ARGB:
00725 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00726 return altivec_yuv2_argb;
00727 case PIX_FMT_ABGR:
00728 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00729 return altivec_yuv2_abgr;
00730 case PIX_FMT_RGBA:
00731 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00732 return altivec_yuv2_rgba;
00733 case PIX_FMT_BGRA:
00734 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00735 return altivec_yuv2_bgra;
00736 default: return NULL;
00737 }
00738 break;
00739
00740 case PIX_FMT_UYVY422:
00741 switch(c->dstFormat){
00742 case PIX_FMT_BGR32:
00743 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00744 return altivec_uyvy_rgb32;
00745 default: return NULL;
00746 }
00747 break;
00748
00749 }
00750 return NULL;
00751 }
00752
00753 void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
00754 {
00755 union {
00756 signed short tmp[8] __attribute__ ((aligned(16)));
00757 vector signed short vec;
00758 } buf;
00759
00760 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
00761 buf.tmp[1] = -256*brightness;
00762 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
00763 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
00764 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
00765 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
00766
00767
00768 c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00769 c->CY = vec_splat ((vector signed short)buf.vec, 0);
00770 c->OY = vec_splat ((vector signed short)buf.vec, 1);
00771 c->CRV = vec_splat ((vector signed short)buf.vec, 2);
00772 c->CBU = vec_splat ((vector signed short)buf.vec, 3);
00773 c->CGU = vec_splat ((vector signed short)buf.vec, 4);
00774 c->CGV = vec_splat ((vector signed short)buf.vec, 5);
00775 #if 0
00776 {
00777 int i;
00778 char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
00779 for (i=0; i<6; i++)
00780 printf("%s %d ", v[i],buf.tmp[i] );
00781 printf("\n");
00782 }
00783 #endif
00784 return;
00785 }
00786
00787
00788 void
00789 altivec_yuv2packedX (SwsContext *c,
00790 int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00791 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00792 uint8_t *dest, int dstW, int dstY)
00793 {
00794 int i,j;
00795 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00796 vector signed short R0,G0,B0,R1,G1,B1;
00797
00798 vector unsigned char R,G,B;
00799 vector unsigned char *out,*nout;
00800
00801 vector signed short RND = vec_splat_s16(1<<3);
00802 vector unsigned short SCL = vec_splat_u16(4);
00803 unsigned long scratch[16] __attribute__ ((aligned (16)));
00804
00805 vector signed short *YCoeffs, *CCoeffs;
00806
00807 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00808 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00809
00810 out = (vector unsigned char *)dest;
00811
00812 for (i=0; i<dstW; i+=16){
00813 Y0 = RND;
00814 Y1 = RND;
00815
00816 for (j=0; j<lumFilterSize; j++) {
00817 X0 = vec_ld (0, &lumSrc[j][i]);
00818 X1 = vec_ld (16, &lumSrc[j][i]);
00819 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00820 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00821 }
00822
00823 U = RND;
00824 V = RND;
00825
00826 for (j=0; j<chrFilterSize; j++) {
00827 X = vec_ld (0, &chrSrc[j][i/2]);
00828 U = vec_mradds (X, CCoeffs[j], U);
00829 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00830 V = vec_mradds (X, CCoeffs[j], V);
00831 }
00832
00833
00834 Y0 = vec_sra (Y0, SCL);
00835 Y1 = vec_sra (Y1, SCL);
00836 U = vec_sra (U, SCL);
00837 V = vec_sra (V, SCL);
00838
00839 Y0 = vec_clip_s16 (Y0);
00840 Y1 = vec_clip_s16 (Y1);
00841 U = vec_clip_s16 (U);
00842 V = vec_clip_s16 (V);
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853 U0 = vec_mergeh (U,U);
00854 V0 = vec_mergeh (V,V);
00855
00856 U1 = vec_mergel (U,U);
00857 V1 = vec_mergel (V,V);
00858
00859 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00860 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00861
00862 R = vec_packclp (R0,R1);
00863 G = vec_packclp (G0,G1);
00864 B = vec_packclp (B0,B1);
00865
00866 switch(c->dstFormat) {
00867 case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
00868 case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
00869 case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
00870 case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
00871 case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00872 case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00873 default:
00874 {
00875
00876
00877 static int printed_error_message;
00878 if (!printed_error_message) {
00879 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00880 sws_format_name(c->dstFormat));
00881 printed_error_message=1;
00882 }
00883 return;
00884 }
00885 }
00886 }
00887
00888 if (i < dstW) {
00889 i -= 16;
00890
00891 Y0 = RND;
00892 Y1 = RND;
00893
00894 for (j=0; j<lumFilterSize; j++) {
00895 X0 = vec_ld (0, &lumSrc[j][i]);
00896 X1 = vec_ld (16, &lumSrc[j][i]);
00897 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00898 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00899 }
00900
00901 U = RND;
00902 V = RND;
00903
00904 for (j=0; j<chrFilterSize; j++) {
00905 X = vec_ld (0, &chrSrc[j][i/2]);
00906 U = vec_mradds (X, CCoeffs[j], U);
00907 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00908 V = vec_mradds (X, CCoeffs[j], V);
00909 }
00910
00911
00912 Y0 = vec_sra (Y0, SCL);
00913 Y1 = vec_sra (Y1, SCL);
00914 U = vec_sra (U, SCL);
00915 V = vec_sra (V, SCL);
00916
00917 Y0 = vec_clip_s16 (Y0);
00918 Y1 = vec_clip_s16 (Y1);
00919 U = vec_clip_s16 (U);
00920 V = vec_clip_s16 (V);
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931 U0 = vec_mergeh (U,U);
00932 V0 = vec_mergeh (V,V);
00933
00934 U1 = vec_mergel (U,U);
00935 V1 = vec_mergel (V,V);
00936
00937 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00938 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00939
00940 R = vec_packclp (R0,R1);
00941 G = vec_packclp (G0,G1);
00942 B = vec_packclp (B0,B1);
00943
00944 nout = (vector unsigned char *)scratch;
00945 switch(c->dstFormat) {
00946 case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
00947 case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
00948 case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
00949 case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
00950 case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00951 case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00952 default:
00953
00954 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00955 sws_format_name(c->dstFormat));
00956 return;
00957 }
00958
00959 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00960 }
00961
00962 }