00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093
00094 #include "config.h"
00095 #include "libswscale/rgb2rgb.h"
00096 #include "libswscale/swscale.h"
00097 #include "libswscale/swscale_internal.h"
00098 #include "libavutil/attributes.h"
00099 #include "libavutil/cpu.h"
00100 #include "libavutil/pixdesc.h"
00101 #include "yuv2rgb_altivec.h"
00102
00103 #undef PROFILE_THE_BEAST
00104 #undef INC_SCALING
00105
00106 typedef unsigned char ubyte;
00107 typedef signed char sbyte;
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145 static const vector unsigned char
00146 perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05,
00147 0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a },
00148 perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17,
00149 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
00150 perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
00151 0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 },
00152 perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a,
00153 0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f };
00154
00155 #define vec_merge3(x2, x1, x0, y0, y1, y2) \
00156 do { \
00157 __typeof__(x0) o0, o2, o3; \
00158 o0 = vec_mergeh(x0, x1); \
00159 y0 = vec_perm(o0, x2, perm_rgb_0); \
00160 o2 = vec_perm(o0, x2, perm_rgb_1); \
00161 o3 = vec_mergel(x0, x1); \
00162 y1 = vec_perm(o3, o2, perm_rgb_2); \
00163 y2 = vec_perm(o3, o2, perm_rgb_3); \
00164 } while (0)
00165
00166 #define vec_mstbgr24(x0, x1, x2, ptr) \
00167 do { \
00168 __typeof__(x0) _0, _1, _2; \
00169 vec_merge3(x0, x1, x2, _0, _1, _2); \
00170 vec_st(_0, 0, ptr++); \
00171 vec_st(_1, 0, ptr++); \
00172 vec_st(_2, 0, ptr++); \
00173 } while (0)
00174
00175 #define vec_mstrgb24(x0, x1, x2, ptr) \
00176 do { \
00177 __typeof__(x0) _0, _1, _2; \
00178 vec_merge3(x2, x1, x0, _0, _1, _2); \
00179 vec_st(_0, 0, ptr++); \
00180 vec_st(_1, 0, ptr++); \
00181 vec_st(_2, 0, ptr++); \
00182 } while (0)
00183
00184
00185
00186
00187
00188 #define vec_mstrgb32(T, x0, x1, x2, x3, ptr) \
00189 do { \
00190 T _0, _1, _2, _3; \
00191 _0 = vec_mergeh(x0, x1); \
00192 _1 = vec_mergeh(x2, x3); \
00193 _2 = (T) vec_mergeh((vector unsigned short) _0, \
00194 (vector unsigned short) _1); \
00195 _3 = (T) vec_mergel((vector unsigned short) _0, \
00196 (vector unsigned short) _1); \
00197 vec_st(_2, 0 * 16, (T *) ptr); \
00198 vec_st(_3, 1 * 16, (T *) ptr); \
00199 _0 = vec_mergel(x0, x1); \
00200 _1 = vec_mergel(x2, x3); \
00201 _2 = (T) vec_mergeh((vector unsigned short) _0, \
00202 (vector unsigned short) _1); \
00203 _3 = (T) vec_mergel((vector unsigned short) _0, \
00204 (vector unsigned short) _1); \
00205 vec_st(_2, 2 * 16, (T *) ptr); \
00206 vec_st(_3, 3 * 16, (T *) ptr); \
00207 ptr += 4; \
00208 } while (0)
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223 #define vec_unh(x) \
00224 (vector signed short) \
00225 vec_perm(x, (__typeof__(x)) { 0 }, \
00226 ((vector unsigned char) { \
00227 0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, \
00228 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 }))
00229
00230 #define vec_unl(x) \
00231 (vector signed short) \
00232 vec_perm(x, (__typeof__(x)) { 0 }, \
00233 ((vector unsigned char) { \
00234 0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, \
00235 0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F }))
00236
00237 #define vec_clip_s16(x) \
00238 vec_max(vec_min(x, ((vector signed short) { \
00239 235, 235, 235, 235, 235, 235, 235, 235 })), \
00240 ((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 }))
00241
00242 #define vec_packclp(x, y) \
00243 (vector unsigned char) \
00244 vec_packs((vector unsigned short) \
00245 vec_max(x, ((vector signed short) { 0 })), \
00246 (vector unsigned short) \
00247 vec_max(y, ((vector signed short) { 0 })))
00248
00249
00250
00251 static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
00252 vector signed short U, vector signed short V,
00253 vector signed short *R, vector signed short *G,
00254 vector signed short *B)
00255 {
00256 vector signed short vx, ux, uvx;
00257
00258 Y = vec_mradds(Y, c->CY, c->OY);
00259 U = vec_sub(U, (vector signed short)
00260 vec_splat((vector signed short) { 128 }, 0));
00261 V = vec_sub(V, (vector signed short)
00262 vec_splat((vector signed short) { 128 }, 0));
00263
00264
00265 ux = vec_sl(U, c->CSHIFT);
00266 *B = vec_mradds(ux, c->CBU, Y);
00267
00268
00269 vx = vec_sl(V, c->CSHIFT);
00270 *R = vec_mradds(vx, c->CRV, Y);
00271
00272
00273 uvx = vec_mradds(U, c->CGU, Y);
00274 *G = vec_mradds(V, c->CGV, uvx);
00275 }
00276
00277
00278
00279
00280
00281
00282
00283 #define DEFCSP420_CVT(name, out_pixels) \
00284 static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
00285 int *instrides, int srcSliceY, int srcSliceH, \
00286 unsigned char **oplanes, int *outstrides) \
00287 { \
00288 int w = c->srcW; \
00289 int h = srcSliceH; \
00290 int i, j; \
00291 int instrides_scl[3]; \
00292 vector unsigned char y0, y1; \
00293 \
00294 vector signed char u, v; \
00295 \
00296 vector signed short Y0, Y1, Y2, Y3; \
00297 vector signed short U, V; \
00298 vector signed short vx, ux, uvx; \
00299 vector signed short vx0, ux0, uvx0; \
00300 vector signed short vx1, ux1, uvx1; \
00301 vector signed short R0, G0, B0; \
00302 vector signed short R1, G1, B1; \
00303 vector unsigned char R, G, B; \
00304 \
00305 const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00306 vector unsigned char align_perm; \
00307 \
00308 vector signed short lCY = c->CY; \
00309 vector signed short lOY = c->OY; \
00310 vector signed short lCRV = c->CRV; \
00311 vector signed short lCBU = c->CBU; \
00312 vector signed short lCGU = c->CGU; \
00313 vector signed short lCGV = c->CGV; \
00314 vector unsigned short lCSHIFT = c->CSHIFT; \
00315 \
00316 const ubyte *y1i = in[0]; \
00317 const ubyte *y2i = in[0] + instrides[0]; \
00318 const ubyte *ui = in[1]; \
00319 const ubyte *vi = in[2]; \
00320 \
00321 vector unsigned char *oute, *outo; \
00322 \
00323 \
00324 instrides_scl[0] = instrides[0] * 2 - w; \
00325 \
00326 instrides_scl[1] = instrides[1] - w / 2; \
00327 \
00328 instrides_scl[2] = instrides[2] - w / 2; \
00329 \
00330 for (i = 0; i < h / 2; i++) { \
00331 oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \
00332 (srcSliceY + i * 2)); \
00333 outo = oute + (outstrides[0] >> 4); \
00334 vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \
00335 vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \
00336 \
00337 for (j = 0; j < w / 16; j++) { \
00338 y1ivP = (const vector unsigned char *) y1i; \
00339 y2ivP = (const vector unsigned char *) y2i; \
00340 uivP = (const vector unsigned char *) ui; \
00341 vivP = (const vector unsigned char *) vi; \
00342 \
00343 align_perm = vec_lvsl(0, y1i); \
00344 y0 = (vector unsigned char) \
00345 vec_perm(y1ivP[0], y1ivP[1], align_perm); \
00346 \
00347 align_perm = vec_lvsl(0, y2i); \
00348 y1 = (vector unsigned char) \
00349 vec_perm(y2ivP[0], y2ivP[1], align_perm); \
00350 \
00351 align_perm = vec_lvsl(0, ui); \
00352 u = (vector signed char) \
00353 vec_perm(uivP[0], uivP[1], align_perm); \
00354 \
00355 align_perm = vec_lvsl(0, vi); \
00356 v = (vector signed char) \
00357 vec_perm(vivP[0], vivP[1], align_perm); \
00358 \
00359 u = (vector signed char) \
00360 vec_sub(u, \
00361 (vector signed char) \
00362 vec_splat((vector signed char) { 128 }, 0)); \
00363 v = (vector signed char) \
00364 vec_sub(v, \
00365 (vector signed char) \
00366 vec_splat((vector signed char) { 128 }, 0)); \
00367 \
00368 U = vec_unpackh(u); \
00369 V = vec_unpackh(v); \
00370 \
00371 Y0 = vec_unh(y0); \
00372 Y1 = vec_unl(y0); \
00373 Y2 = vec_unh(y1); \
00374 Y3 = vec_unl(y1); \
00375 \
00376 Y0 = vec_mradds(Y0, lCY, lOY); \
00377 Y1 = vec_mradds(Y1, lCY, lOY); \
00378 Y2 = vec_mradds(Y2, lCY, lOY); \
00379 Y3 = vec_mradds(Y3, lCY, lOY); \
00380 \
00381 \
00382 ux = vec_sl(U, lCSHIFT); \
00383 ux = vec_mradds(ux, lCBU, (vector signed short) { 0 }); \
00384 ux0 = vec_mergeh(ux, ux); \
00385 ux1 = vec_mergel(ux, ux); \
00386 \
00387 \
00388 vx = vec_sl(V, lCSHIFT); \
00389 vx = vec_mradds(vx, lCRV, (vector signed short) { 0 }); \
00390 vx0 = vec_mergeh(vx, vx); \
00391 vx1 = vec_mergel(vx, vx); \
00392 \
00393 \
00394 uvx = vec_mradds(U, lCGU, (vector signed short) { 0 }); \
00395 uvx = vec_mradds(V, lCGV, uvx); \
00396 uvx0 = vec_mergeh(uvx, uvx); \
00397 uvx1 = vec_mergel(uvx, uvx); \
00398 \
00399 R0 = vec_add(Y0, vx0); \
00400 G0 = vec_add(Y0, uvx0); \
00401 B0 = vec_add(Y0, ux0); \
00402 R1 = vec_add(Y1, vx1); \
00403 G1 = vec_add(Y1, uvx1); \
00404 B1 = vec_add(Y1, ux1); \
00405 \
00406 R = vec_packclp(R0, R1); \
00407 G = vec_packclp(G0, G1); \
00408 B = vec_packclp(B0, B1); \
00409 \
00410 out_pixels(R, G, B, oute); \
00411 \
00412 R0 = vec_add(Y2, vx0); \
00413 G0 = vec_add(Y2, uvx0); \
00414 B0 = vec_add(Y2, ux0); \
00415 R1 = vec_add(Y3, vx1); \
00416 G1 = vec_add(Y3, uvx1); \
00417 B1 = vec_add(Y3, ux1); \
00418 R = vec_packclp(R0, R1); \
00419 G = vec_packclp(G0, G1); \
00420 B = vec_packclp(B0, B1); \
00421 \
00422 \
00423 out_pixels(R, G, B, outo); \
00424 \
00425 y1i += 16; \
00426 y2i += 16; \
00427 ui += 8; \
00428 vi += 8; \
00429 } \
00430 \
00431 ui += instrides_scl[1]; \
00432 vi += instrides_scl[2]; \
00433 y1i += instrides_scl[0]; \
00434 y2i += instrides_scl[0]; \
00435 } \
00436 return srcSliceH; \
00437 }
00438
00439 #define out_abgr(a, b, c, ptr) \
00440 vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr)
00441 #define out_bgra(a, b, c, ptr) \
00442 vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr)
00443 #define out_rgba(a, b, c, ptr) \
00444 vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr)
00445 #define out_argb(a, b, c, ptr) \
00446 vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr)
00447 #define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr)
00448 #define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr)
00449
00450 DEFCSP420_CVT(yuv2_abgr, out_abgr)
00451 DEFCSP420_CVT(yuv2_bgra, out_bgra)
00452 DEFCSP420_CVT(yuv2_rgba, out_rgba)
00453 DEFCSP420_CVT(yuv2_argb, out_argb)
00454 DEFCSP420_CVT(yuv2_rgb24, out_rgb24)
00455 DEFCSP420_CVT(yuv2_bgr24, out_bgr24)
00456
00457
00458
00459 static const vector unsigned char
00460 demux_u = { 0x10, 0x00, 0x10, 0x00,
00461 0x10, 0x04, 0x10, 0x04,
00462 0x10, 0x08, 0x10, 0x08,
00463 0x10, 0x0c, 0x10, 0x0c },
00464 demux_v = { 0x10, 0x02, 0x10, 0x02,
00465 0x10, 0x06, 0x10, 0x06,
00466 0x10, 0x0A, 0x10, 0x0A,
00467 0x10, 0x0E, 0x10, 0x0E },
00468 demux_y = { 0x10, 0x01, 0x10, 0x03,
00469 0x10, 0x05, 0x10, 0x07,
00470 0x10, 0x09, 0x10, 0x0B,
00471 0x10, 0x0D, 0x10, 0x0F };
00472
00473
00474
00475
00476 static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in,
00477 int *instrides, int srcSliceY, int srcSliceH,
00478 unsigned char **oplanes, int *outstrides)
00479 {
00480 int w = c->srcW;
00481 int h = srcSliceH;
00482 int i, j;
00483 vector unsigned char uyvy;
00484 vector signed short Y, U, V;
00485 vector signed short R0, G0, B0, R1, G1, B1;
00486 vector unsigned char R, G, B;
00487 vector unsigned char *out;
00488 const ubyte *img;
00489
00490 img = in[0];
00491 out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]);
00492
00493 for (i = 0; i < h; i++)
00494 for (j = 0; j < w / 16; j++) {
00495 uyvy = vec_ld(0, img);
00496
00497 U = (vector signed short)
00498 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
00499 V = (vector signed short)
00500 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
00501 Y = (vector signed short)
00502 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
00503
00504 cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0);
00505
00506 uyvy = vec_ld(16, img);
00507
00508 U = (vector signed short)
00509 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
00510 V = (vector signed short)
00511 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
00512 Y = (vector signed short)
00513 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
00514
00515 cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1);
00516
00517 R = vec_packclp(R0, R1);
00518 G = vec_packclp(G0, G1);
00519 B = vec_packclp(B0, B1);
00520
00521
00522 out_rgba(R, G, B, out);
00523
00524 img += 32;
00525 }
00526 return srcSliceH;
00527 }
00528
00529
00530
00531
00532
00533
00534
00535 av_cold SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00536 {
00537 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
00538 return NULL;
00539
00540
00541
00542
00543
00544
00545
00546
00547 if ((c->srcW & 0xf) != 0)
00548 return NULL;
00549
00550 switch (c->srcFormat) {
00551 case PIX_FMT_YUV410P:
00552 case PIX_FMT_YUV420P:
00553
00554 case PIX_FMT_GRAY8:
00555 case PIX_FMT_NV12:
00556 case PIX_FMT_NV21:
00557 if ((c->srcH & 0x1) != 0)
00558 return NULL;
00559
00560 switch (c->dstFormat) {
00561 case PIX_FMT_RGB24:
00562 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00563 return altivec_yuv2_rgb24;
00564 case PIX_FMT_BGR24:
00565 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00566 return altivec_yuv2_bgr24;
00567 case PIX_FMT_ARGB:
00568 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00569 return altivec_yuv2_argb;
00570 case PIX_FMT_ABGR:
00571 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00572 return altivec_yuv2_abgr;
00573 case PIX_FMT_RGBA:
00574 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00575 return altivec_yuv2_rgba;
00576 case PIX_FMT_BGRA:
00577 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00578 return altivec_yuv2_bgra;
00579 default: return NULL;
00580 }
00581 break;
00582
00583 case PIX_FMT_UYVY422:
00584 switch (c->dstFormat) {
00585 case PIX_FMT_BGR32:
00586 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00587 return altivec_uyvy_rgb32;
00588 default: return NULL;
00589 }
00590 break;
00591 }
00592 return NULL;
00593 }
00594
00595 av_cold void ff_yuv2rgb_init_tables_altivec(SwsContext *c,
00596 const int inv_table[4],
00597 int brightness,
00598 int contrast,
00599 int saturation)
00600 {
00601 union {
00602 DECLARE_ALIGNED(16, signed short, tmp)[8];
00603 vector signed short vec;
00604 } buf;
00605
00606 buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9;
00607 buf.tmp[1] = -256 * brightness;
00608 buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16);
00609 buf.tmp[3] = (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16);
00610 buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16));
00611 buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16));
00612
00613 c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
00614 c->CY = vec_splat((vector signed short) buf.vec, 0);
00615 c->OY = vec_splat((vector signed short) buf.vec, 1);
00616 c->CRV = vec_splat((vector signed short) buf.vec, 2);
00617 c->CBU = vec_splat((vector signed short) buf.vec, 3);
00618 c->CGU = vec_splat((vector signed short) buf.vec, 4);
00619 c->CGV = vec_splat((vector signed short) buf.vec, 5);
00620 return;
00621 }
00622
00623 static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c,
00624 const int16_t *lumFilter,
00625 const int16_t **lumSrc,
00626 int lumFilterSize,
00627 const int16_t *chrFilter,
00628 const int16_t **chrUSrc,
00629 const int16_t **chrVSrc,
00630 int chrFilterSize,
00631 const int16_t **alpSrc,
00632 uint8_t *dest,
00633 int dstW, int dstY,
00634 enum PixelFormat target)
00635 {
00636 int i, j;
00637 vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
00638 vector signed short R0, G0, B0, R1, G1, B1;
00639
00640 vector unsigned char R, G, B;
00641 vector unsigned char *out, *nout;
00642
00643 vector signed short RND = vec_splat_s16(1 << 3);
00644 vector unsigned short SCL = vec_splat_u16(4);
00645 DECLARE_ALIGNED(16, unsigned int, scratch)[16];
00646
00647 vector signed short *YCoeffs, *CCoeffs;
00648
00649 YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
00650 CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
00651
00652 out = (vector unsigned char *) dest;
00653
00654 for (i = 0; i < dstW; i += 16) {
00655 Y0 = RND;
00656 Y1 = RND;
00657
00658 for (j = 0; j < lumFilterSize; j++) {
00659 X0 = vec_ld(0, &lumSrc[j][i]);
00660 X1 = vec_ld(16, &lumSrc[j][i]);
00661 Y0 = vec_mradds(X0, YCoeffs[j], Y0);
00662 Y1 = vec_mradds(X1, YCoeffs[j], Y1);
00663 }
00664
00665 U = RND;
00666 V = RND;
00667
00668 for (j = 0; j < chrFilterSize; j++) {
00669 X = vec_ld(0, &chrUSrc[j][i / 2]);
00670 U = vec_mradds(X, CCoeffs[j], U);
00671 X = vec_ld(0, &chrVSrc[j][i / 2]);
00672 V = vec_mradds(X, CCoeffs[j], V);
00673 }
00674
00675
00676 Y0 = vec_sra(Y0, SCL);
00677 Y1 = vec_sra(Y1, SCL);
00678 U = vec_sra(U, SCL);
00679 V = vec_sra(V, SCL);
00680
00681 Y0 = vec_clip_s16(Y0);
00682 Y1 = vec_clip_s16(Y1);
00683 U = vec_clip_s16(U);
00684 V = vec_clip_s16(V);
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695 U0 = vec_mergeh(U, U);
00696 V0 = vec_mergeh(V, V);
00697
00698 U1 = vec_mergel(U, U);
00699 V1 = vec_mergel(V, V);
00700
00701 cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
00702 cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
00703
00704 R = vec_packclp(R0, R1);
00705 G = vec_packclp(G0, G1);
00706 B = vec_packclp(B0, B1);
00707
00708 switch (target) {
00709 case PIX_FMT_ABGR:
00710 out_abgr(R, G, B, out);
00711 break;
00712 case PIX_FMT_BGRA:
00713 out_bgra(R, G, B, out);
00714 break;
00715 case PIX_FMT_RGBA:
00716 out_rgba(R, G, B, out);
00717 break;
00718 case PIX_FMT_ARGB:
00719 out_argb(R, G, B, out);
00720 break;
00721 case PIX_FMT_RGB24:
00722 out_rgb24(R, G, B, out);
00723 break;
00724 case PIX_FMT_BGR24:
00725 out_bgr24(R, G, B, out);
00726 break;
00727 default:
00728 {
00729
00730
00731 static int printed_error_message;
00732 if (!printed_error_message) {
00733 av_log(c, AV_LOG_ERROR,
00734 "altivec_yuv2packedX doesn't support %s output\n",
00735 av_get_pix_fmt_name(c->dstFormat));
00736 printed_error_message = 1;
00737 }
00738 return;
00739 }
00740 }
00741 }
00742
00743 if (i < dstW) {
00744 i -= 16;
00745
00746 Y0 = RND;
00747 Y1 = RND;
00748
00749 for (j = 0; j < lumFilterSize; j++) {
00750 X0 = vec_ld(0, &lumSrc[j][i]);
00751 X1 = vec_ld(16, &lumSrc[j][i]);
00752 Y0 = vec_mradds(X0, YCoeffs[j], Y0);
00753 Y1 = vec_mradds(X1, YCoeffs[j], Y1);
00754 }
00755
00756 U = RND;
00757 V = RND;
00758
00759 for (j = 0; j < chrFilterSize; j++) {
00760 X = vec_ld(0, &chrUSrc[j][i / 2]);
00761 U = vec_mradds(X, CCoeffs[j], U);
00762 X = vec_ld(0, &chrVSrc[j][i / 2]);
00763 V = vec_mradds(X, CCoeffs[j], V);
00764 }
00765
00766
00767 Y0 = vec_sra(Y0, SCL);
00768 Y1 = vec_sra(Y1, SCL);
00769 U = vec_sra(U, SCL);
00770 V = vec_sra(V, SCL);
00771
00772 Y0 = vec_clip_s16(Y0);
00773 Y1 = vec_clip_s16(Y1);
00774 U = vec_clip_s16(U);
00775 V = vec_clip_s16(V);
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786 U0 = vec_mergeh(U, U);
00787 V0 = vec_mergeh(V, V);
00788
00789 U1 = vec_mergel(U, U);
00790 V1 = vec_mergel(V, V);
00791
00792 cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
00793 cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
00794
00795 R = vec_packclp(R0, R1);
00796 G = vec_packclp(G0, G1);
00797 B = vec_packclp(B0, B1);
00798
00799 nout = (vector unsigned char *) scratch;
00800 switch (target) {
00801 case PIX_FMT_ABGR:
00802 out_abgr(R, G, B, nout);
00803 break;
00804 case PIX_FMT_BGRA:
00805 out_bgra(R, G, B, nout);
00806 break;
00807 case PIX_FMT_RGBA:
00808 out_rgba(R, G, B, nout);
00809 break;
00810 case PIX_FMT_ARGB:
00811 out_argb(R, G, B, nout);
00812 break;
00813 case PIX_FMT_RGB24:
00814 out_rgb24(R, G, B, nout);
00815 break;
00816 case PIX_FMT_BGR24:
00817 out_bgr24(R, G, B, nout);
00818 break;
00819 default:
00820
00821 av_log(c, AV_LOG_ERROR,
00822 "altivec_yuv2packedX doesn't support %s output\n",
00823 av_get_pix_fmt_name(c->dstFormat));
00824 return;
00825 }
00826
00827 memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4);
00828 }
00829 }
00830
00831 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
00832 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \
00833 const int16_t *lumFilter, \
00834 const int16_t **lumSrc, \
00835 int lumFilterSize, \
00836 const int16_t *chrFilter, \
00837 const int16_t **chrUSrc, \
00838 const int16_t **chrVSrc, \
00839 int chrFilterSize, \
00840 const int16_t **alpSrc, \
00841 uint8_t *dest, int dstW, int dstY) \
00842 { \
00843 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
00844 chrFilter, chrUSrc, chrVSrc, \
00845 chrFilterSize, alpSrc, \
00846 dest, dstW, dstY, pixfmt); \
00847 }
00848
00849 YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
00850 YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
00851 YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
00852 YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
00853 YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
00854 YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);