[FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8
Mateusz
mateuszb at poczta.onet.pl
Fri Sep 22 03:10:01 EEST 2017
To reduce bit depth in planar YUV or gray pixel formats ffmpeg uses DITHER_COPY macro.
Now it makes images greener and with visible dither pattern.
In my opinion there is no point to use dither tables for destination bit depth >= 9,
we can use simple down-shift which is neutral in full and limited range -- result images
are with the same brightness and with the same colors.
For destination bit depth == 8 we could use new bit exact precise DITHER_COPY macro
(which is slower).
If the problem is with speed only, I've attached second patch with Intel Intrinsics for x86_64
that makes code faster (I don't see any Intel Intrinsics in ffmpeg so it's probably for testing only).
Please review.
Mateusz
-------------- next part --------------
From a52417a3817ac774eb364bbef20c954a3d278d45 Mon Sep 17 00:00:00 2001
From: Mateusz <mateuszb at poczta.onet.pl>
Date: Fri, 22 Sep 2017 01:22:59 +0200
Subject: [PATCH] swscale_unscaled: fix and speed up DITHER_COPY macro for
x86_64, use it only for dst_depth == 8
---
libswscale/swscale_unscaled.c | 185 ++++++++++++++++++++++++++++++++++--------
1 file changed, 150 insertions(+), 35 deletions(-)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..7d1cbed 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -35,6 +35,10 @@
#include "libavutil/avassert.h"
#include "libavutil/avconfig.h"
+#if ARCH_X86_64
+#include <emmintrin.h>
+#endif
+
DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
{
{ 0, 1, 0, 1, 0, 1, 0, 1,},
@@ -110,24 +114,6 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
{ 112, 16,104, 8,118, 22,110, 14,},
}};
-static const uint16_t dither_scale[15][16]={
-{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
-{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
-{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
-{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
-{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
-{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
-{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
-{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
-{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
-{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
-{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
-{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
-{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
-{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
-{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
-};
-
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
@@ -1502,22 +1488,127 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
}
#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
- uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
- int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+ unsigned shift= src_depth-dst_depth, tmp;\
+ if (shiftonly) {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8) {\
+ tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ for (; j < length; j++) {\
+ tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ } else {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8) {\
+ tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\
+ tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\
+ tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\
+ tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\
+ tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\
+ tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\
+ tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\
+ tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\
+ }\
+ for (; j < length; j++) {\
+ tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ }
+
+#define SHIFT_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+ unsigned shift= src_depth-dst_depth;\
+ for (i = 0; i < height; i++) {\
+ for (j = 0; j < length-7; j+=8) {\
+ dst[j+0] = dbswap(bswap(src[j+0])>>shift);\
+ dst[j+1] = dbswap(bswap(src[j+1])>>shift);\
+ dst[j+2] = dbswap(bswap(src[j+2])>>shift);\
+ dst[j+3] = dbswap(bswap(src[j+3])>>shift);\
+ dst[j+4] = dbswap(bswap(src[j+4])>>shift);\
+ dst[j+5] = dbswap(bswap(src[j+5])>>shift);\
+ dst[j+6] = dbswap(bswap(src[j+6])>>shift);\
+ dst[j+7] = dbswap(bswap(src[j+7])>>shift);\
+ }\
+ for (; j < length; j++)\
+ dst[j] = dbswap(bswap(src[j])>>shift);\
+ dst += dstStride;\
+ src += srcStride;\
+ }
+
+#define MM_BSWAP16(n) _mm_or_si128(_mm_srli_epi16(n, 8), _mm_slli_epi16(n, 8))
+
+// Only for dst_depth == 8
+#define DITHER_COPY_X64(dst, dstStride, src, srcStride, bswap, mbswap)\
+ unsigned shift= src_depth-8, tmp;\
+ __m128i A0, D0;\
+ if (shiftonly) {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ D0 = _mm_loadl_epi64((__m128i const*)dither);\
+ D0 = _mm_unpacklo_epi8(D0, _mm_setzero_si128());\
+ for (j = 0; j < length-7; j+=8) {\
+ A0 = _mm_loadu_si128((__m128i const*)(src + j));\
+ A0 = mbswap(A0);\
+ A0 = _mm_adds_epu16(A0, D0);\
+ A0 = _mm_srli_epi16(A0, shift);\
+ A0 = _mm_packus_epi16(A0, A0);\
+ _mm_storel_epi64((__m128i*)(dst + j), A0);\
+ }\
+ for (; j < length; j++) {\
+ tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = tmp - (tmp>>8);\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ } else {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ D0 = _mm_loadl_epi64((__m128i const*)dither);\
+ D0 = _mm_unpacklo_epi8(D0, _mm_setzero_si128());\
+ for (j = 0; j < length-7; j+=8) {\
+ A0 = _mm_loadu_si128((__m128i const*)(src + j));\
+ A0 = mbswap(A0);\
+ A0 = _mm_sub_epi16(A0, _mm_srli_epi16(A0, 8));\
+ A0 = _mm_add_epi16(A0, D0);\
+ A0 = _mm_srli_epi16(A0, shift);\
+ A0 = _mm_packus_epi16(A0, A0);\
+ _mm_storel_epi64((__m128i*)(dst + j), A0);\
+ }\
+ for (; j < length; j++) {\
+ tmp = bswap(src[j]); dst[j] = (tmp - (tmp>>8) + dither[j&7])>>shift;\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ }
+
+// Only for dst_depth > 8
+#define SHIFT_COPY_X64(dst, dstStride, src, srcStride, bswap, dbswap, mbswap, mdbswap)\
+ unsigned shift= src_depth-dst_depth;\
+ __m128i A0;\
for (i = 0; i < height; i++) {\
- const uint8_t *dither= dithers[src_depth-9][i&7];\
- for (j = 0; j < length-7; j+=8){\
- dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
- dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
- dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
- dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
- dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
- dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
- dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
- dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+ for (j = 0; j < length-7; j+=8) {\
+ A0 = _mm_loadu_si128((__m128i const*)(src + j));\
+ A0 = mbswap(A0);\
+ A0 = _mm_srli_epi16(A0, shift);\
+ A0 = mdbswap(A0);\
+ _mm_storeu_si128((__m128i*)(dst + j), A0);\
}\
for (; j < length; j++)\
- dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
+ dst[j] = dbswap(bswap(src[j])>>shift);\
dst += dstStride;\
src += srcStride;\
}
@@ -1561,9 +1652,17 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
if (dst_depth == 8) {
if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+#if ARCH_X86_64
+ DITHER_COPY_X64(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+#else
DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+#endif
} else {
+#if ARCH_X86_64
+ DITHER_COPY_X64(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, MM_BSWAP16)
+#else
DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
+#endif
}
} else if (src_depth == 8) {
for (i = 0; i < height; i++) {
@@ -1642,15 +1741,31 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
} else {
if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
+#if ARCH_X86_64
+ SHIFT_COPY_X64(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , , , )
+#else
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
+#endif
} else {
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
+#if ARCH_X86_64
+ SHIFT_COPY_X64(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16, , MM_BSWAP16)
+#else
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
+#endif
}
}else{
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
+#if ARCH_X86_64
+ SHIFT_COPY_X64(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, , MM_BSWAP16, )
+#else
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
+#endif
} else {
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
+#if ARCH_X86_64
+ SHIFT_COPY_X64(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16, MM_BSWAP16, MM_BSWAP16)
+#else
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
+#endif
}
}
}
--
2.7.4
-------------- next part --------------
From 1f1df5085fa9e15aa183d109d72829b6e260fced Mon Sep 17 00:00:00 2001
From: Mateusz <mateuszb at poczta.onet.pl>
Date: Fri, 22 Sep 2017 01:15:32 +0200
Subject: [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for
dst_depth == 8
---
libswscale/swscale_unscaled.c | 92 +++++++++++++++++++++++++++----------------
1 file changed, 57 insertions(+), 35 deletions(-)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..b1d5eb9 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
{ 112, 16,104, 8,118, 22,110, 14,},
}};
-static const uint16_t dither_scale[15][16]={
-{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
-{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
-{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
-{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
-{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
-{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
-{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
-{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
-{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
-{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
-{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
-{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
-{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
-{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
-{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
-};
-
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
@@ -1502,22 +1484,62 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
}
#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
- uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
- int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+ unsigned shift= src_depth-dst_depth, tmp;\
+ if (shiftonly) {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8) {\
+ tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
+ tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ for (; j < length; j++) {\
+ tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ } else {\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[shift-1][i&7];\
+ for (j = 0; j < length-7; j+=8) {\
+ tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\
+ tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\
+ tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\
+ tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\
+ tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\
+ tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\
+ tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\
+ tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\
+ }\
+ for (; j < length; j++) {\
+ tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\
+ }\
+ dst += dstStride;\
+ src += srcStride;\
+ }\
+ }
+
+#define SHIFT_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+ unsigned shift= src_depth-dst_depth;\
for (i = 0; i < height; i++) {\
- const uint8_t *dither= dithers[src_depth-9][i&7];\
- for (j = 0; j < length-7; j+=8){\
- dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
- dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
- dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
- dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
- dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
- dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
- dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
- dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+ for (j = 0; j < length-7; j+=8) {\
+ dst[j+0] = dbswap(bswap(src[j+0])>>shift);\
+ dst[j+1] = dbswap(bswap(src[j+1])>>shift);\
+ dst[j+2] = dbswap(bswap(src[j+2])>>shift);\
+ dst[j+3] = dbswap(bswap(src[j+3])>>shift);\
+ dst[j+4] = dbswap(bswap(src[j+4])>>shift);\
+ dst[j+5] = dbswap(bswap(src[j+5])>>shift);\
+ dst[j+6] = dbswap(bswap(src[j+6])>>shift);\
+ dst[j+7] = dbswap(bswap(src[j+7])>>shift);\
}\
for (; j < length; j++)\
- dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
+ dst[j] = dbswap(bswap(src[j])>>shift);\
dst += dstStride;\
src += srcStride;\
}
@@ -1642,15 +1664,15 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
} else {
if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
} else {
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
}
}else{
if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
} else {
- DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
+ SHIFT_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
}
}
}
--
2.7.4
More information about the ffmpeg-devel
mailing list