[FFmpegcvslog] avcodec/magicyuv: Avoid AV_QSORT when creating Huffman table
Andreas Rheinhardt
git at videolan.org
Sat Sep 26 22:12:35 EEST 2020
ffmpeg  branch: master  Andreas Rheinhardt <andreas.rheinhardt at gmail.com>  Wed Sep 23 15:17:33 2020 +0200 [1bf30a1beb7527887c9b466d07765d20fa3b5412]  committer: Andreas Rheinhardt
avcodec/magicyuv: Avoid AV_QSORT when creating Huffman table
The MagicYUV format stores Huffman tables in its bitstream by coding
the length of a given symbol; it does not code the actual code directly,
instead this is to be inferred by the rule that a symbol is to the left
of every shorter symbol in the Huffman tree and that for symbols of the
same length the symbol is ascending from left to right.
Our decoder implemented this by first sorting the array containing
length and symbol of each element according to descending length and
for equal length, according to ascending symbol. Afterwards, the current
state in the tree got encoded in a variable code; if the next array entry
had length len, then the len most significant bits of code contained
the code of this entry. Whenever an entry of the array of length
len was processed, code was incremented by 1U << (32  len). So two
entries of length len have the same effect as incrementing code by
1U << (32  (len  1)), which corresponds to the parent node of length
len  1 of the two nodes of length len etc.
This commit modifies this to avoid sorting the entries before
calculating the codes. This is done by calculating how many nonleaf
nodes there are on each level of the tree before calculating the codes.
Afterwards every leaf node on this level gets assigned the number of
nodes already on this level as code. This of course works only because
the entries are already sorted by their symbol initially, so that this
algorithm indeed gives ascending symbols from left to right on every
level.
This offers both speed as well as (obvious) codesize advantages. With
Clang 10 the number of decicycles for build_huffman decreased from
1561987 to 1228405; for GCC 9 it went from 1825096 decicyles to 1429921.
These tests were carried out with a sample with 150 frames that was
looped 13 times; and this was iterated 10 times. The earlier reference
point here is from the point when the loop generating the codes was
traversed in reverse order (as the patch reversing the order led to
performance penalties).
Reviewedby: Paul B Mahol <onemda at gmail.com>
Signedoffby: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1bf30a1beb7527887c9b466d07765d20fa3b5412

libavcodec/magicyuv.c  36 ++++++++++++++++++
1 file changed, 18 insertions(+), 18 deletions()
diff git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
index 17dea69d76..7dded9b457 100644
 a/libavcodec/magicyuv.c
+++ b/libavcodec/magicyuv.c
@@ 25,7 +25,6 @@
#define CACHED_BITSTREAM_READER !ARCH_X86_32
#include "libavutil/pixdesc.h"
#include "libavutil/qsort.h"
#include "avcodec.h"
#include "bytestream.h"
@@ 74,26 +73,24 @@ typedef struct MagicYUVContext {
LLVidDSPContext llviddsp;
} MagicYUVContext;
static int huff_cmp_len(const void *a, const void *b)
+static int huff_build(HuffEntry he[], uint16_t codes_count[33],
+ VLC *vlc, int nb_elems)
{
 const HuffEntry *aa = a, *bb = b;
 return (bb>len  aa>len) * 4096 + aa>sym  bb>sym;
}

static int huff_build(HuffEntry he[], VLC *vlc, int nb_elems)
{
 uint32_t code;

 AV_QSORT(he, nb_elems, HuffEntry, huff_cmp_len);
+ unsigned nb_codes = 0, max = 0;
+
+ for (int i = 32; i > 0; i) {
+ uint16_t curr = codes_count[i]; // # of leafs of length i
+ codes_count[i] = nb_codes / 2; // # of nonleaf nodes on level i
+ nb_codes = codes_count[i] + curr; // # of nodes on level i
+ if (curr && !max)
+ max = i;
+ }
 code = 0;
for (unsigned i = 0; i < nb_elems; i++) {
 he[i].code = code >> (32  he[i].len);
 code += 0x80000000u >> (he[i].len  1);
+ he[i].code = codes_count[he[i].len];
+ codes_count[he[i].len]++;
}

 ff_free_vlc(vlc);
 return ff_init_vlc_sparse(vlc, FFMIN(he[0].len, 12), nb_elems,
+ return ff_init_vlc_sparse(vlc, FFMIN(max, 12), nb_elems,
&he[0].len, sizeof(he[0]), sizeof(he[0].len),
&he[0].code, sizeof(he[0]), sizeof(he[0].code),
&he[0].sym, sizeof(he[0]), sizeof(he[0].sym), 0);
@@ 389,6 +386,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
MagicYUVContext *s = avctx>priv_data;
GetByteContext gb;
HuffEntry he[4096];
+ uint16_t length_count[33] = { 0 };
int i = 0, j = 0, k;
bytestream2_init(&gb, table, table_size);
@@ 409,6 +407,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
return AVERROR_INVALIDDATA;
}
+ length_count[x] += l;
for (; j < k; j++) {
he[j].sym = j;
he[j].len = x;
@@ 416,7 +415,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
if (j == max) {
j = 0;
 if (huff_build(he, &s>vlc[i], max)) {
+ if (huff_build(he, length_count, &s>vlc[i], max)) {
av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
return AVERROR_INVALIDDATA;
}
@@ 424,6 +423,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
if (i == s>planes) {
break;
}
+ memset(length_count, 0, sizeof(length_count));
}
}
More information about the ffmpegcvslog
mailing list