[FFmpeg-devel] [PATCH] avutil/intmath: use de Bruijn based ff_ctz
Ganesh Ajjanagadde
gajjanagadde at gmail.com
Mon Oct 12 03:55:36 CEST 2015
It has already been demonstrated that the de Bruijn method has benefits
over the current implementation: commit 971d12b7f9d7be3ca8eb98e6c04ed521f83cbd3c.
That commit implemented it for long long, this extends it to the 32 bit
version.
The function is renamed from ff_ctz to ff_ctz32 since it crucially
depends on the 32 bit width of its argument. This is not an issue, as the
only usage in avcodec/flacenc uses an int32_t anyway.
Tested with FATE.
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
---
libavcodec/flacenc.c | 2 +-
libavutil/intmath.h | 44 +++++++++++++-------------------------------
2 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index e87fdc1..cb234dc 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -1065,7 +1065,7 @@ static void remove_wasted_bits(FlacEncodeContext *s)
}
if (v && !(v & 1)) {
- v = ff_ctz(v);
+ v = ff_ctz32(v);
for (i = 0; i < s->frame.blocksize; i++)
sub->samples[i] >>= v;
diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index 802abe3..751ac7a 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -107,8 +107,8 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
#if HAVE_FAST_CLZ
#if defined( __INTEL_COMPILER )
-#ifndef ff_ctz
-#define ff_ctz(v) _bit_scan_forward(v)
+#ifndef ff_ctz32
+#define ff_ctz32(v) _bit_scan_forward(v)
#endif
#elif AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_ctz
@@ -120,8 +120,8 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
#endif
#endif
-#ifndef ff_ctz
-#define ff_ctz ff_ctz_c
+#ifndef ff_ctz32
+#define ff_ctz32 ff_ctz32_c
/**
* Trailing zero bit count.
*
@@ -129,36 +129,18 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
* @return the number of trailing 0-bits
*/
#if !defined( _MSC_VER )
-static av_always_inline av_const int ff_ctz_c(int v)
+/* We use the De-Bruijn method outlined in:
+ * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
+static av_always_inline av_const int ff_ctz32_c(int32_t v)
{
- int c;
-
- if (v & 0x1)
- return 0;
-
- c = 1;
- if (!(v & 0xffff)) {
- v >>= 16;
- c += 16;
- }
- if (!(v & 0xff)) {
- v >>= 8;
- c += 8;
- }
- if (!(v & 0xf)) {
- v >>= 4;
- c += 4;
- }
- if (!(v & 0x3)) {
- v >>= 2;
- c += 2;
- }
- c -= v & 0x1;
-
- return c;
+ static const uint8_t debruijn_ctz32[32] = {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+ return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
}
#else
-static av_always_inline av_const int ff_ctz_c( int v )
+static av_always_inline av_const int ff_ctz32_c( int v )
{
unsigned long c;
_BitScanForward(&c, v);
--
2.6.1
More information about the ffmpeg-devel
mailing list