[FFmpeg-devel] [PATCH] Use larger tables for yuv > 8 bit to RGB conversion.
Reimar Döffinger
Reimar.Doeffinger at gmx.de
Sat Nov 9 20:56:37 CET 2013
This should allow for fairly precise YUV16 to RGB48 conversion
for example.
However I believe that this specific implementation is not as accurate
as it could/should be, i.e. the table generation might be buggy.
In addition it make the scaled yuv->rgb slightly slower, though also
more precise for 9 and 10 bit.
Still not sure it is a good idea.
---
libswscale/output.c | 44 +++++-----
libswscale/swscale_internal.h | 11 +--
libswscale/swscale_unscaled.c | 1 -
libswscale/yuv2rgb.c | 195 ++++++++++++++++++++----------------------
libswscale/yuv2rgb_template.c | 28 +++---
5 files changed, 135 insertions(+), 144 deletions(-)
diff --git a/libswscale/output.c b/libswscale/output.c
index ddb0d0c..d862510 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1254,13 +1254,14 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
int y, enum AVPixelFormat target, int hasAlpha)
{
int i;
+ int shift = 27 - c->yuvtable_bits;
for (i = 0; i < ((dstW + 1) >> 1); i++) {
int j, A1, A2;
- int Y1 = 1 << 18;
- int Y2 = 1 << 18;
- int U = 1 << 18;
- int V = 1 << 18;
+ int Y1 = 1 << (shift - 1);
+ int Y2 = 1 << (shift - 1);
+ int U = 1 << (shift - 1);
+ int V = 1 << (shift - 1);
const void *r, *g, *b;
for (j = 0; j < lumFilterSize; j++) {
@@ -1271,10 +1272,10 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
U += chrUSrc[j][i] * chrFilter[j];
V += chrVSrc[j][i] * chrFilter[j];
}
- Y1 >>= 19;
- Y2 >>= 19;
- U >>= 19;
- V >>= 19;
+ Y1 >>= shift;
+ Y2 >>= shift;
+ U >>= shift;
+ V >>= shift;
if (hasAlpha) {
A1 = 1 << 18;
A2 = 1 << 18;
@@ -1306,6 +1307,7 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
int yalpha, int uvalpha, int y,
enum AVPixelFormat target, int hasAlpha)
{
+ int shift = 27 - c->yuvtable_bits;
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
@@ -1316,10 +1318,10 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
int i;
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
- int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
- int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
- int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
+ int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> shift;
+ int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> shift;
+ int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> shift;
+ int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> shift;
int A1, A2;
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
@@ -1344,15 +1346,17 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
int uvalpha, int y, enum AVPixelFormat target,
int hasAlpha)
{
+ int shift = 15 - c->yuvtable_bits;
+ int round = 1 << (shift - 1);
const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
if (uvalpha < 2048) {
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = (buf0[i * 2 ] + 64) >> 7;
- int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
- int U = (ubuf0[i] + 64) >> 7;
- int V = (vbuf0[i] + 64) >> 7;
+ int Y1 = (buf0[i * 2 ] + round) >> shift;
+ int Y2 = (buf0[i * 2 + 1] + round) >> shift;
+ int U = (ubuf0[i] + round) >> shift;
+ int V = (vbuf0[i] + round) >> shift;
int A1, A2;
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
@@ -1371,10 +1375,10 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = (buf0[i * 2 ] + 64) >> 7;
- int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
- int U = (ubuf0[i] + ubuf1[i] + 128) >> 8;
- int V = (vbuf0[i] + vbuf1[i] + 128) >> 8;
+ int Y1 = (buf0[i * 2 ] + round) >> shift;
+ int Y2 = (buf0[i * 2 + 1] + round) >> shift;
+ int U = (ubuf0[i] + ubuf1[i] + 2*round) >> (shift + 1);
+ int V = (vbuf0[i] + vbuf1[i] + 2*round) >> (shift + 1);
int A1, A2;
const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
*g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 6ad278e..2a93f6f 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -37,7 +37,7 @@
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
-#define YUVRGB_TABLE_HEADROOM 128
+#define YUVRGB_TABLE_HEADROOM 512
#define MAX_FILTER_SIZE 256
@@ -362,12 +362,13 @@ typedef struct SwsContext {
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
+ int yuvtable_bits;
// alignment ensures the offset can be added in a single
// instruction on e.g. ARM
- DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ DECLARE_ALIGNED(16, int, table_gV)[1024 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_rV[1024 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[1024 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[1024 + 2*YUVRGB_TABLE_HEADROOM];
DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points
#define RY_IDX 0
#define GY_IDX 1
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 8842f35..e96b12d 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1219,7 +1219,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
srcFormat == AV_PIX_FMT_YUV420P9 || srcFormat == AV_PIX_FMT_YUV422P9 ||
srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUV422P10 ||
- srcFormat == AV_PIX_FMT_YUV420P16 || srcFormat == AV_PIX_FMT_YUV422P16 ||
srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
!(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
c->swscale = ff_yuv2rgb_get_func_ptr(c);
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 28de37e..1d23aca 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -54,60 +54,60 @@ const int *sws_getCoefficients(int colorspace)
}
#define LOADCHROMA(i) \
- U = pu[i] >> shift; \
- V = pv[i] >> shift; \
- r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
- g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
+ U = pu[i]; \
+ V = pv[i]; \
+ r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
+ g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
#define PUTRGB(dst, src, i) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y] + g[Y] + b[Y]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
#define PUTRGB24(dst, src, i) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[6 * i + 0] = r[Y]; \
dst[6 * i + 1] = g[Y]; \
dst[6 * i + 2] = b[Y]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[6 * i + 3] = r[Y]; \
dst[6 * i + 4] = g[Y]; \
dst[6 * i + 5] = b[Y];
#define PUTBGR24(dst, src, i) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[6 * i + 0] = b[Y]; \
dst[6 * i + 1] = g[Y]; \
dst[6 * i + 2] = r[Y]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[6 * i + 3] = b[Y]; \
dst[6 * i + 4] = g[Y]; \
dst[6 * i + 5] = r[Y];
#define PUTRGBA(dst, ysrc, asrc, i, s) \
- Y = ysrc[2 * i] >> shift; \
- dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] >> shift << s); \
- Y = ysrc[2 * i + 1] >> shift; \
- dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] >> shift << s);
+ Y = ysrc[2 * i]; \
+ dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << s); \
+ Y = ysrc[2 * i + 1]; \
+ dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
#define PUTRGB48(dst, src, i) \
- Y = src[ 2 * i] >> shift; \
+ Y = src[ 2 * i]; \
dst[12 * i + 0] = dst[12 * i + 1] = r[Y]; \
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \
dst[12 * i + 4] = dst[12 * i + 5] = b[Y]; \
- Y = src[ 2 * i + 1] >> shift; \
+ Y = src[ 2 * i + 1]; \
dst[12 * i + 6] = dst[12 * i + 7] = r[Y]; \
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \
dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
#define PUTBGR48(dst, src, i) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[12 * i + 0] = dst[12 * i + 1] = b[Y]; \
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \
dst[12 * i + 4] = dst[12 * i + 5] = r[Y]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[12 * i + 6] = dst[12 * i + 7] = b[Y]; \
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \
dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
@@ -164,35 +164,15 @@ const int *sws_getCoefficients(int colorspace)
ENDYUV2RGBFUNC()
#define src_type const uint8_t
-#define shift 0
#define suffix(a) a
#include "yuv2rgb_template.c"
#undef src_type
-#undef shift
#undef suffix
#define src_type const uint16_t
-#define shift 1
-#define suffix(a) a##9
-#include "yuv2rgb_template.c"
-#undef src_type
-#undef shift
-#undef suffix
-
-#define src_type const uint16_t
-#define shift 2
-#define suffix(a) a##10
-#include "yuv2rgb_template.c"
-#undef src_type
-#undef shift
-#undef suffix
-
-#define src_type const uint16_t
-#define shift 8
#define suffix(a) a##16
#include "yuv2rgb_template.c"
#undef src_type
-#undef shift
#undef suffix
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
@@ -216,8 +196,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
"No accelerated colorspace conversion found from %s to %s.\n",
av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
-#define SELECT(n) \
- (bits == 16 ? n##16 : bits == 10 ? n##10 : bits == 9 ? n##9 : n)
+#define SELECT(n) (bits > 8 ? n##16 : n)
switch (c->dstFormat) {
case AV_PIX_FMT_BGR48BE:
@@ -261,27 +240,29 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return NULL;
}
-static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize,
+static void fill_table(uint8_t **table, int bits, const int elemsize,
const int64_t inc, void *y_tab)
{
int i;
uint8_t *y_table = y_tab;
+ int count = 1 << bits;
- y_table -= elemsize * (inc >> 9);
+ y_table -= elemsize * (inc >> (17 - bits));
- for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
- int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
+ for (i = 0; i < count + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, count-1)*inc;
table[i] = y_table + elemsize * (cb >> 16);
}
}
-static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int64_t inc)
+static void fill_gv_table(int *table, int bits, const int elemsize, const int64_t inc)
{
int i;
- int off = -(inc >> 9);
+ int off = -(inc >> (17 - bits));
+ int count = 1 << bits;
- for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
- int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
+ for (i = 0; i < count + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, count-1)*inc;
table[i] = elemsize * (off + (cb >> 16));
}
}
@@ -302,6 +283,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
int fullRange, int brightness,
int contrast, int saturation)
{
+ int bits = av_clip(av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1 + 1, 8, 10);
+ int table_scale = (1 << bits) >> 8;
const int isRgb = c->dstFormat == AV_PIX_FMT_RGB32 ||
c->dstFormat == AV_PIX_FMT_RGB32_1 ||
c->dstFormat == AV_PIX_FMT_BGR24 ||
@@ -326,7 +309,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
uint16_t *y_table16;
uint32_t *y_table32;
int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha;
- const int yoffs = fullRange ? 384 : 326;
+ int yoffs = fullRange ? 384 : 326;
int64_t crv = inv_table[0];
int64_t cbu = inv_table[1];
@@ -375,117 +358,121 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1);
cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1);
+ yoffs *= table_scale;
+ cy /= table_scale;
+
av_freep(&c->yuvTable);
+ c->yuvtable_bits = bits;
switch (bpp) {
case 1:
- c->yuvTable = av_malloc(1024);
+ c->yuvTable = av_malloc(table_scale * 1024);
y_table = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024 - 110; i++) {
- y_table[i + 110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
+ for (i = 0; i < table_scale * (1024 - 110); i++) {
+ y_table[i + table_scale * 110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
yb += cy;
}
- fill_table(c->table_gU, 1, cgu, y_table + yoffs);
- fill_gv_table(c->table_gV, 1, cgv);
+ fill_table(c->table_gU, bits, 1, cgu, y_table + yoffs);
+ fill_gv_table(c->table_gV, bits, 1, cgv);
break;
case 4:
case 4 | 128:
rbase = isRgb ? 3 : 0;
gbase = 1;
bbase = isRgb ? 0 : 3;
- c->yuvTable = av_malloc(1024 * 3);
+ c->yuvTable = av_malloc(table_scale * 1024 * 3);
y_table = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024 - 110; i++) {
+ for (i = 0; i < table_scale * (1024 - 110); i++) {
int yval = av_clip_uint8((yb + 0x8000) >> 16);
- y_table[i + 110] = (yval >> 7) << rbase;
- y_table[i + 37 + 1024] = ((yval + 43) / 85) << gbase;
- y_table[i + 110 + 2048] = (yval >> 7) << bbase;
+ y_table[i + table_scale * 110] = (yval >> 7) << rbase;
+ y_table[i + table_scale * 37 + table_scale * 1024] = ((yval + 43) / 85) << gbase;
+ y_table[i + table_scale * 110 + table_scale * 2048] = (yval >> 7) << bbase;
yb += cy;
}
- fill_table(c->table_rV, 1, crv, y_table + yoffs);
- fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
- fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
- fill_gv_table(c->table_gV, 1, cgv);
+ fill_table(c->table_rV, bits, 1, crv, y_table + yoffs);
+ fill_table(c->table_gU, bits, 1, cgu, y_table + yoffs + table_scale * 1024);
+ fill_table(c->table_bU, bits, 1, cbu, y_table + yoffs + table_scale * 2048);
+ fill_gv_table(c->table_gV, bits, 1, cgv);
break;
case 8:
rbase = isRgb ? 5 : 0;
gbase = isRgb ? 2 : 3;
bbase = isRgb ? 0 : 6;
- c->yuvTable = av_malloc(1024 * 3);
+ c->yuvTable = av_malloc(table_scale * 1024 * 3);
y_table = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024 - 38; i++) {
+ for (i = 0; i < table_scale * 1024 - table_scale * 38; i++) {
int yval = av_clip_uint8((yb + 0x8000) >> 16);
- y_table[i + 16] = ((yval + 18) / 36) << rbase;
- y_table[i + 16 + 1024] = ((yval + 18) / 36) << gbase;
- y_table[i + 37 + 2048] = ((yval + 43) / 85) << bbase;
+ y_table[i + table_scale * 16] = ((yval + 18) / 36) << rbase;
+ y_table[i + table_scale * 16 + table_scale * 1024] = ((yval + 18) / 36) << gbase;
+ y_table[i + table_scale * 37 + table_scale * 2048] = ((yval + 43) / 85) << bbase;
yb += cy;
}
- fill_table(c->table_rV, 1, crv, y_table + yoffs);
- fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
- fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
- fill_gv_table(c->table_gV, 1, cgv);
+ fill_table(c->table_rV, bits, 1, crv, y_table + yoffs);
+ fill_table(c->table_gU, bits, 1, cgu, y_table + yoffs + table_scale * 1024);
+ fill_table(c->table_bU, bits, 1, cbu, y_table + yoffs + table_scale * 2048);
+ fill_gv_table(c->table_gV, bits, 1, cgv);
break;
case 12:
rbase = isRgb ? 8 : 0;
gbase = 4;
bbase = isRgb ? 0 : 8;
- c->yuvTable = av_malloc(1024 * 3 * 2);
+ c->yuvTable = av_malloc(table_scale * 1024 * 3 * 2);
y_table16 = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024; i++) {
+ for (i = 0; i < table_scale * 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table16[i] = (yval >> 4) << rbase;
- y_table16[i + 1024] = (yval >> 4) << gbase;
- y_table16[i + 2048] = (yval >> 4) << bbase;
+ y_table16[i + table_scale * 1024] = (yval >> 4) << gbase;
+ y_table16[i + table_scale * 2048] = (yval >> 4) << bbase;
yb += cy;
}
if (isNotNe)
- for (i = 0; i < 1024 * 3; i++)
+ for (i = 0; i < table_scale * 1024 * 3; i++)
y_table16[i] = av_bswap16(y_table16[i]);
- fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
- fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
- fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
- fill_gv_table(c->table_gV, 2, cgv);
+ fill_table(c->table_rV, bits, 2, crv, y_table16 + yoffs);
+ fill_table(c->table_gU, bits, 2, cgu, y_table16 + yoffs + table_scale * 1024);
+ fill_table(c->table_bU, bits, 2, cbu, y_table16 + yoffs + table_scale * 2048);
+ fill_gv_table(c->table_gV, bits, 2, cgv);
break;
case 15:
case 16:
rbase = isRgb ? bpp - 5 : 0;
gbase = 5;
bbase = isRgb ? 0 : (bpp - 5);
- c->yuvTable = av_malloc(1024 * 3 * 2);
+ c->yuvTable = av_malloc(table_scale * 1024 * 3 * 2);
y_table16 = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024; i++) {
+ for (i = 0; i < table_scale * 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table16[i] = (yval >> 3) << rbase;
- y_table16[i + 1024] = (yval >> (18 - bpp)) << gbase;
- y_table16[i + 2048] = (yval >> 3) << bbase;
+ y_table16[i + table_scale * 1024] = (yval >> (18 - bpp)) << gbase;
+ y_table16[i + table_scale * 2048] = (yval >> 3) << bbase;
yb += cy;
}
if (isNotNe)
- for (i = 0; i < 1024 * 3; i++)
+ for (i = 0; i < table_scale * 1024 * 3; i++)
y_table16[i] = av_bswap16(y_table16[i]);
- fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
- fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
- fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
- fill_gv_table(c->table_gV, 2, cgv);
+ fill_table(c->table_rV, bits, 2, crv, y_table16 + yoffs);
+ fill_table(c->table_gU, bits, 2, cgu, y_table16 + yoffs + table_scale * 1024);
+ fill_table(c->table_bU, bits, 2, cbu, y_table16 + yoffs + table_scale * 2048);
+ fill_gv_table(c->table_gV, bits, 2, cgv);
break;
case 24:
case 48:
- c->yuvTable = av_malloc(1024);
+ c->yuvTable = av_malloc(table_scale * 1024);
y_table = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024; i++) {
+ for (i = 0; i < table_scale * 1024; i++) {
y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
yb += cy;
}
- fill_table(c->table_rV, 1, crv, y_table + yoffs);
- fill_table(c->table_gU, 1, cgu, y_table + yoffs);
- fill_table(c->table_bU, 1, cbu, y_table + yoffs);
- fill_gv_table(c->table_gV, 1, cgv);
+ fill_table(c->table_rV, bits, 1, crv, y_table + yoffs);
+ fill_table(c->table_gU, bits, 1, cgu, y_table + yoffs);
+ fill_table(c->table_bU, bits, 1, cbu, y_table + yoffs);
+ fill_gv_table(c->table_gV, bits, 1, cgv);
break;
case 32:
case 64:
@@ -497,21 +484,21 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
if (!needAlpha)
abase = (base + 24) & 31;
- c->yuvTable = av_malloc(1024 * 3 * 4);
+ c->yuvTable = av_malloc(table_scale * 1024 * 3 * 4);
y_table32 = c->yuvTable;
yb = -(384 << 16) - oy;
- for (i = 0; i < 1024; i++) {
+ for (i = 0; i < table_scale * 1024; i++) {
unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i] = (yval << rbase) +
(needAlpha ? 0 : (255u << abase));
- y_table32[i + 1024] = yval << gbase;
- y_table32[i + 2048] = yval << bbase;
+ y_table32[i + table_scale * 1024] = yval << gbase;
+ y_table32[i + table_scale * 2048] = yval << bbase;
yb += cy;
}
- fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
- fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
- fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
- fill_gv_table(c->table_gV, 4, cgv);
+ fill_table(c->table_rV, bits, 4, crv, y_table32 + yoffs);
+ fill_table(c->table_gU, bits, 4, cgu, y_table32 + yoffs + table_scale * 1024);
+ fill_table(c->table_bU, bits, 4, cbu, y_table32 + yoffs + table_scale * 2048);
+ fill_gv_table(c->table_gV, bits, 4, cgv);
break;
default:
if(!isPlanar(c->dstFormat) || bpp <= 24)
diff --git a/libswscale/yuv2rgb_template.c b/libswscale/yuv2rgb_template.c
index e3ca8ba..0fe9cde 100644
--- a/libswscale/yuv2rgb_template.c
+++ b/libswscale/yuv2rgb_template.c
@@ -248,11 +248,11 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
#define PUTRGB16(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y + d16[0 + o]] + \
g[Y + e16[0 + o]] + \
b[Y + f16[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y + d16[1 + o]] + \
g[Y + e16[1 + o]] + \
b[Y + f16[1 + o]];
@@ -278,11 +278,11 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
#define PUTRGB15(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y + d16[0 + o]] + \
g[Y + d16[1 + o]] + \
b[Y + e16[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y + d16[1 + o]] + \
g[Y + d16[0 + o]] + \
b[Y + e16[1 + o]];
@@ -308,11 +308,11 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
const uint8_t *d16 = ff_dither_4x4_16[y & 3];
#define PUTRGB12(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y + d16[0 + o]] + \
g[Y + d16[0 + o]] + \
b[Y + d16[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y + d16[1 + o]] + \
g[Y + d16[1 + o]] + \
b[Y + d16[1 + o]];
@@ -340,11 +340,11 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
const uint8_t *d64 = ff_dither_8x8_73[y & 7];
#define PUTRGB8(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y + d32[0 + o]] + \
g[Y + d32[0 + o]] + \
b[Y + d64[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y + d32[1 + o]] + \
g[Y + d32[1 + o]] + \
b[Y + d64[1 + o]];
@@ -372,11 +372,11 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
int acc;
#define PUTRGB4D(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
acc = r[Y + d128[0 + o]] + \
g[Y + d64[0 + o]] + \
b[Y + d128[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
acc |= (r[Y + d128[1 + o]] + \
g[Y + d64[1 + o]] + \
b[Y + d128[1 + o]]) << 4; \
@@ -404,11 +404,11 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
const uint8_t *d128 = ff_dither_8x8_220[y & 7];
#define PUTRGB4DB(dst, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
dst[2 * i] = r[Y + d128[0 + o]] + \
g[Y + d64[0 + o]] + \
b[Y + d128[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y + d128[1 + o]] + \
g[Y + d64[1 + o]] + \
b[Y + d128[1 + o]];
@@ -436,9 +436,9 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
#define PUTRGB1(out, src, i, o) \
- Y = src[2 * i] >> shift; \
+ Y = src[2 * i]; \
out += out + g[Y + d128[0 + o]]; \
- Y = src[2 * i + 1] >> shift; \
+ Y = src[2 * i + 1]; \
out += out + g[Y + d128[1 + o]];
PUTRGB1(out_1, py_1, 0, 0);
--
1.8.4.2
More information about the ffmpeg-devel
mailing list