[FFmpeg-devel] [PATCH] all: replace qsort with AV_QSORT
Ganesh Ajjanagadde
gajjanag at mit.edu
Sun Oct 18 17:05:24 CEST 2015
On Sun, Oct 18, 2015 at 10:57 AM, Hendrik Leppkes <h.leppkes at gmail.com> wrote:
> On Sun, Oct 18, 2015 at 4:47 PM, Ganesh Ajjanagadde
> <gajjanagadde at gmail.com> wrote:
>> Commit e11e32686fdb21aded1ccf70202f1fffe87bb6a2 explains why replacing
>> qsort with AV_QSORT yields performance improvements.
>>
>> This replaces all existing uses of libc's qsort with AV_QSORT.
>>
>> Benchmarks deemed unnecessary due to existing claims about AV_QSORT.
>> Tested with FATE.
>
> Claims are nice, data is better. Some testing when doing performance
> improvements shouldn't be too much to ask. :)
Just look at the asm, all cmp callbacks are inlined, removing function
call overhead. I personally don't care beyond that.
>
>>
>> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
>> ---
>> cmdutils.c | 3 ++-
>> cmdutils_opencl.c | 3 ++-
>> ffmpeg.c | 3 ++-
>> libavcodec/aacsbr_template.c | 14 ++++++++------
>> libavcodec/huffman.c | 3 ++-
>> libavcodec/motion_est_template.c | 3 ++-
>> libavcodec/utvideodec.c | 4 ++--
>> libavcodec/utvideoenc.c | 5 +++--
>> libavfilter/f_sendcmd.c | 3 ++-
>> libavfilter/vf_deshake.c | 3 ++-
>> libavfilter/vf_palettegen.c | 2 +-
>> libavfilter/vf_paletteuse.c | 2 +-
>> libavfilter/vf_removegrain.c | 7 ++++---
>> libavformat/subtitles.c | 10 +++++++---
>> libswresample/swresample-test.c | 3 ++-
>> tests/checkasm/checkasm.c | 4 ++--
>> 16 files changed, 44 insertions(+), 28 deletions(-)
>>
>> diff --git a/cmdutils.c b/cmdutils.c
>> index 9cdc801..c7e515e 100644
>> --- a/cmdutils.c
>> +++ b/cmdutils.c
>> @@ -47,6 +47,7 @@
>> #include "libavutil/libm.h"
>> #include "libavutil/parseutils.h"
>> #include "libavutil/pixdesc.h"
>> +#include "libavutil/qsort.h"
>> #include "libavutil/eval.h"
>> #include "libavutil/dict.h"
>> #include "libavutil/opt.h"
>> @@ -1441,7 +1442,7 @@ static unsigned get_codecs_sorted(const AVCodecDescriptor ***rcodecs)
>> while ((desc = avcodec_descriptor_next(desc)))
>> codecs[i++] = desc;
>> av_assert0(i == nb_codecs);
>> - qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc);
>> + AV_QSORT(codecs, nb_codecs, AVCodecDescriptor *, compare_codec_desc);
>> *rcodecs = codecs;
>> return nb_codecs;
>> }
>> diff --git a/cmdutils_opencl.c b/cmdutils_opencl.c
>> index 61478e2..f6a4ddf 100644
>> --- a/cmdutils_opencl.c
>> +++ b/cmdutils_opencl.c
>> @@ -22,6 +22,7 @@
>> #include "libavutil/time.h"
>> #include "libavutil/log.h"
>> #include "libavutil/opencl.h"
>> +#include "libavutil/qsort.h"
>> #include "libavutil/avstring.h"
>> #include "cmdutils.h"
>>
>> @@ -245,7 +246,7 @@ int opt_opencl_bench(void *optctx, const char *opt, const char *arg)
>> }
>> }
>> }
>> - qsort(devices, count, sizeof(OpenCLDeviceBenchmark), compare_ocl_device_desc);
>> + AV_QSORT(devices, count, OpenCLDeviceBenchmark, compare_ocl_device_desc);
>> fprintf(stderr, "platform_idx\tdevice_idx\tdevice_name\truntime\n");
>> for (i = 0; i < count; i++)
>> fprintf(stdout, "%d\t%d\t%s\t%"PRId64"\n",
>> diff --git a/ffmpeg.c b/ffmpeg.c
>> index 36a68fb..fa6132b 100644
>> --- a/ffmpeg.c
>> +++ b/ffmpeg.c
>> @@ -43,6 +43,7 @@
>> #include "libavdevice/avdevice.h"
>> #include "libswresample/swresample.h"
>> #include "libavutil/opt.h"
>> +#include "libavutil/qsort.h"
>> #include "libavutil/channel_layout.h"
>> #include "libavutil/parseutils.h"
>> #include "libavutil/samplefmt.h"
>> @@ -2705,7 +2706,7 @@ static void parse_forced_key_frames(char *kf, OutputStream *ost,
>> }
>>
>> av_assert0(index == size);
>> - qsort(pts, size, sizeof(*pts), compare_int64);
>> + AV_QSORT(pts, size, int64_t, compare_int64);
>> ost->forced_kf_count = size;
>> ost->forced_kf_pts = pts;
>> }
>> diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
>> index d31b71e..9561ba0 100644
>> --- a/libavcodec/aacsbr_template.c
>> +++ b/libavcodec/aacsbr_template.c
>> @@ -32,6 +32,8 @@
>> * @author Zoran Basaric ( zoran.basaric at imgtec.com )
>> */
>>
>> +#include "libavutil/qsort.h"
>> +
>> av_cold void AAC_RENAME(ff_aac_sbr_init)(void)
>> {
>> static const struct {
>> @@ -138,8 +140,8 @@ static void sbr_make_f_tablelim(SpectralBandReplication *sbr)
>> memcpy(sbr->f_tablelim + sbr->n[0] + 1, patch_borders + 1,
>> (sbr->num_patches - 1) * sizeof(patch_borders[0]));
>>
>> - qsort(sbr->f_tablelim, sbr->num_patches + sbr->n[0],
>> - sizeof(sbr->f_tablelim[0]),
>> + AV_QSORT(sbr->f_tablelim, sbr->num_patches + sbr->n[0],
>> + uint16_t,
>> qsort_comparison_function_int16);
>>
>> sbr->n_lim = sbr->n[0] + sbr->num_patches - 1;
>> @@ -296,7 +298,7 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
>> if (spectrum->bs_stop_freq < 14) {
>> sbr->k[2] = stop_min;
>> make_bands(stop_dk, stop_min, 64, 13);
>> - qsort(stop_dk, 13, sizeof(stop_dk[0]), qsort_comparison_function_int16);
>> + AV_QSORT(stop_dk, 13, int16_t, qsort_comparison_function_int16);
>> for (k = 0; k < spectrum->bs_stop_freq; k++)
>> sbr->k[2] += stop_dk[k];
>> } else if (spectrum->bs_stop_freq == 14) {
>> @@ -389,7 +391,7 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
>>
>> make_bands(vk0+1, sbr->k[0], sbr->k[1], num_bands_0);
>>
>> - qsort(vk0 + 1, num_bands_0, sizeof(vk0[1]), qsort_comparison_function_int16);
>> + AV_QSORT(vk0 + 1, num_bands_0, int16_t, qsort_comparison_function_int16);
>> vdk0_max = vk0[num_bands_0];
>>
>> vk0[0] = sbr->k[0];
>> @@ -430,13 +432,13 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
>>
>> if (vdk1_min < vdk0_max) {
>> int change;
>> - qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);
>> + AV_QSORT(vk1 + 1, num_bands_1, int16_t, qsort_comparison_function_int16);
>> change = FFMIN(vdk0_max - vk1[1], (vk1[num_bands_1] - vk1[1]) >> 1);
>> vk1[1] += change;
>> vk1[num_bands_1] -= change;
>> }
>>
>> - qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);
>> + AV_QSORT(vk1 + 1, num_bands_1, int16_t, qsort_comparison_function_int16);
>>
>> vk1[0] = sbr->k[1];
>> for (k = 1; k <= num_bands_1; k++) {
>> diff --git a/libavcodec/huffman.c b/libavcodec/huffman.c
>> index c771bcf..d7403b8 100644
>> --- a/libavcodec/huffman.c
>> +++ b/libavcodec/huffman.c
>> @@ -26,6 +26,7 @@
>>
>> #include <stdint.h>
>>
>> +#include "libavutil/qsort.h"
>> #include "avcodec.h"
>> #include "get_bits.h"
>> #include "huffman.h"
>> @@ -170,7 +171,7 @@ int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bit
>> "Tree construction is not possible\n");
>> return -1;
>> }
>> - qsort(nodes, nb_codes, sizeof(Node), cmp);
>> + AV_QSORT(nodes, nb_codes, Node, cmp);
>> cur_node = nb_codes;
>> nodes[nb_codes*2-1].count = 0;
>> for (i = 0; i < nb_codes * 2 - 1; i += 2) {
>> diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
>> index 37ff110..327a24b 100644
>> --- a/libavcodec/motion_est_template.c
>> +++ b/libavcodec/motion_est_template.c
>> @@ -24,6 +24,7 @@
>> * Motion estimation template.
>> */
>>
>> +#include "libavutil/qsort.h"
>> #include "mpegvideo.h"
>>
>> //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
>> @@ -723,7 +724,7 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
>> j++;
>> }
>>
>> - qsort(minima, j, sizeof(Minima), minima_cmp);
>> + AV_QSORT(minima, j, Minima, minima_cmp);
>>
>> for(; j<minima_count; j++){
>> minima[j].height=256*256*256*64;
>> diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
>> index 760d9e5..5efc008 100644
>> --- a/libavcodec/utvideodec.c
>> +++ b/libavcodec/utvideodec.c
>> @@ -25,9 +25,9 @@
>> */
>>
>> #include <inttypes.h>
>> -#include <stdlib.h>
>>
>> #include "libavutil/intreadwrite.h"
>> +#include "libavutil/qsort.h"
>> #include "avcodec.h"
>> #include "bswapdsp.h"
>> #include "bytestream.h"
>> @@ -50,7 +50,7 @@ static int build_huff(const uint8_t *src, VLC *vlc, int *fsym)
>> he[i].sym = i;
>> he[i].len = *src++;
>> }
>> - qsort(he, 256, sizeof(*he), ff_ut_huff_cmp_len);
>> + AV_QSORT(he, 256, HuffEntry, ff_ut_huff_cmp_len);
>>
>> if (!he[0].len) {
>> *fsym = he[0].sym;
>> diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
>> index b8e1cc3..dc7d019 100644
>> --- a/libavcodec/utvideoenc.c
>> +++ b/libavcodec/utvideoenc.c
>> @@ -26,6 +26,7 @@
>>
>> #include "libavutil/imgutils.h"
>> #include "libavutil/intreadwrite.h"
>> +#include "libavutil/qsort.h"
>> #include "avcodec.h"
>> #include "internal.h"
>> #include "bswapdsp.h"
>> @@ -331,7 +332,7 @@ static void calculate_codes(HuffEntry *he)
>> int last, i;
>> uint32_t code;
>>
>> - qsort(he, 256, sizeof(*he), ff_ut_huff_cmp_len);
>> + AV_QSORT(he, 256, HuffEntry, ff_ut_huff_cmp_len);
>>
>> last = 255;
>> while (he[last].len == 255 && last)
>> @@ -343,7 +344,7 @@ static void calculate_codes(HuffEntry *he)
>> code += 0x80000000u >> (he[i].len - 1);
>> }
>>
>> - qsort(he, 256, sizeof(*he), huff_cmp_sym);
>> + AV_QSORT(he, 256, HuffEntry, huff_cmp_sym);
>> }
>>
>> /* Write huffman bit codes to a memory block */
>> diff --git a/libavfilter/f_sendcmd.c b/libavfilter/f_sendcmd.c
>> index 37aedc5..a480684 100644
>> --- a/libavfilter/f_sendcmd.c
>> +++ b/libavfilter/f_sendcmd.c
>> @@ -28,6 +28,7 @@
>> #include "libavutil/file.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/parseutils.h"
>> +#include "libavutil/qsort.h"
>> #include "avfilter.h"
>> #include "internal.h"
>> #include "avfiltergraph.h"
>> @@ -411,7 +412,7 @@ static av_cold int init(AVFilterContext *ctx)
>> return AVERROR(EINVAL);
>> }
>>
>> - qsort(s->intervals, s->nb_intervals, sizeof(Interval), cmp_intervals);
>> + AV_QSORT(s->intervals, s->nb_intervals, Interval, cmp_intervals);
>>
>> av_log(ctx, AV_LOG_DEBUG, "Parsed commands:\n");
>> for (i = 0; i < s->nb_intervals; i++) {
>> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
>> index ac13ecd..4252631 100644
>> --- a/libavfilter/vf_deshake.c
>> +++ b/libavfilter/vf_deshake.c
>> @@ -57,6 +57,7 @@
>> #include "libavutil/mem.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/pixdesc.h"
>> +#include "libavutil/qsort.h"
>>
>> #include "deshake.h"
>> #include "deshake_opencl.h"
>> @@ -105,7 +106,7 @@ static double clean_mean(double *values, int count)
>> int cut = count / 5;
>> int x;
>>
>> - qsort(values, count, sizeof(double), (void*)cmp);
>> + AV_QSORT(values, count, double, cmp);
>>
>> for (x = cut; x < count - cut; x++) {
>> mean += values[x];
>> diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
>> index df57c10..243df2d 100644
>> --- a/libavfilter/vf_palettegen.c
>> +++ b/libavfilter/vf_palettegen.c
>> @@ -382,7 +382,7 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx)
>> av_log(ctx, AV_LOG_INFO, "%d%s colors generated out of %d colors; ratio=%f\n",
>> s->nb_boxes, s->reserve_transparent ? "(+1)" : "", s->nb_refs, ratio);
>>
>> - qsort(s->boxes, s->nb_boxes, sizeof(*s->boxes), cmp_color);
>> + AV_QSORT(s->boxes, s->nb_boxes, struct range_box, cmp_color);
>>
>> write_palette(ctx, out);
>>
>> diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
>> index 1225a66..890bb46 100644
>> --- a/libavfilter/vf_paletteuse.c
>> +++ b/libavfilter/vf_paletteuse.c
>> @@ -704,7 +704,7 @@ static void load_colormap(PaletteUseContext *s)
>> struct color_rect box;
>>
>> /* disable transparent colors and dups */
>> - qsort(s->palette, AVPALETTE_COUNT, sizeof(*s->palette), cmp_pal_entry);
>> + AV_QSORT(s->palette, AVPALETTE_COUNT, uint32_t, cmp_pal_entry);
>> for (i = 0; i < AVPALETTE_COUNT; i++) {
>> const uint32_t c = s->palette[i];
>> if (i != 0 && c == last_color) {
>> diff --git a/libavfilter/vf_removegrain.c b/libavfilter/vf_removegrain.c
>> index da17f6a..3a28b15 100644
>> --- a/libavfilter/vf_removegrain.c
>> +++ b/libavfilter/vf_removegrain.c
>> @@ -24,6 +24,7 @@
>> #include "libavutil/imgutils.h"
>> #include "libavutil/opt.h"
>> #include "libavutil/pixdesc.h"
>> +#include "libavutil/qsort.h"
>> #include "avfilter.h"
>> #include "formats.h"
>> #include "internal.h"
>> @@ -92,7 +93,7 @@ static int mode02(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
>> {
>> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
>>
>> - qsort(&a, 8, sizeof(a[0]), cmp_int);
>> + AV_QSORT(a, 8, int, cmp_int);
>>
>> return av_clip(c, a[2 - 1 ], a[7 - 1]);
>> }
>> @@ -101,7 +102,7 @@ static int mode03(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
>> {
>> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
>>
>> - qsort(&a, 8, sizeof(a[0]), cmp_int);
>> + AV_QSORT(a, 8, int, cmp_int);
>>
>> return av_clip(c, a[3 - 1 ], a[6 - 1]);
>> }
>> @@ -110,7 +111,7 @@ static int mode04(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
>> {
>> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
>>
>> - qsort(&a, 8, sizeof(a[0]), cmp_int);
>> + AV_QSORT(a, 8, int, cmp_int);
>>
>> return av_clip(c, a[4 - 1 ], a[5 - 1]);
>> }
>> diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
>> index bb89766..16ab245 100644
>> --- a/libavformat/subtitles.c
>> +++ b/libavformat/subtitles.c
>> @@ -23,6 +23,7 @@
>> #include "avio_internal.h"
>> #include "libavutil/avassert.h"
>> #include "libavutil/avstring.h"
>> +#include "libavutil/qsort.h"
>>
>> void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
>> {
>> @@ -197,9 +198,12 @@ void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
>> {
>> int i;
>>
>> - qsort(q->subs, q->nb_subs, sizeof(*q->subs),
>> - q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
>> - : cmp_pkt_sub_pos_ts);
>> + if (q->sort == SUB_SORT_TS_POS) {
>> + AV_QSORT(q->subs, q->nb_subs, AVPacket, cmp_pkt_sub_ts_pos);
>> + }
>> + else
>> + AV_QSORT(q->subs, q->nb_subs, AVPacket, cmp_pkt_sub_pos_ts);
>> +
>> for (i = 0; i < q->nb_subs; i++)
>> if (q->subs[i].duration == -1 && i < q->nb_subs - 1)
>> q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
>> diff --git a/libswresample/swresample-test.c b/libswresample/swresample-test.c
>> index 9caa750..351ec24 100644
>> --- a/libswresample/swresample-test.c
>> +++ b/libswresample/swresample-test.c
>> @@ -23,6 +23,7 @@
>> #include "libavutil/channel_layout.h"
>> #include "libavutil/common.h"
>> #include "libavutil/opt.h"
>> +#include "libavutil/qsort.h"
>> #include "swresample.h"
>>
>> #undef time
>> @@ -271,7 +272,7 @@ int main(int argc, char **argv){
>> r = (seed * (uint64_t)(max_tests - test)) >>32;
>> FFSWAP(int, remaining_tests[r], remaining_tests[max_tests - test - 1]);
>> }
>> - qsort(remaining_tests + max_tests - num_tests, num_tests, sizeof(remaining_tests[0]), (void*)cmp);
>> + AV_QSORT(remaining_tests + max_tests - num_tests, num_tests, int, cmp);
>> in_sample_rate=16000;
>> for(test=0; test<num_tests; test++){
>> char in_layout_string[256];
>> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
>> index bba2dbc..7edaa31 100644
>> --- a/tests/checkasm/checkasm.c
>> +++ b/tests/checkasm/checkasm.c
>> @@ -22,12 +22,12 @@
>>
>> #include <stdarg.h>
>> #include <stdio.h>
>> -#include <stdlib.h>
>> #include <string.h>
>> #include "checkasm.h"
>> #include "libavutil/common.h"
>> #include "libavutil/cpu.h"
>> #include "libavutil/random_seed.h"
>> +#include "libavutil/qsort.h"
>>
>> #if HAVE_IO_H
>> #include <io.h>
>> @@ -262,7 +262,7 @@ static int measure_nop_time(void)
>> nops[i] = AV_READ_TIME() - t;
>> }
>>
>> - qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
>> + AV_QSORT(nops, 10000, uint16_t, cmp_nop);
>> for (i = 2500; i < 7500; i++)
>> nop_sum += nops[i];
>>
>> --
>> 2.6.1
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list