[FFmpeg-devel] [PATCH] wmadec.c: SIMD optimization using float_to_int16_interleave

Mon Mar 8 17:23:55 CET 2010

"Zhou Zongyi"<zhouzy at os.pku.edu.cn> writes:

> Hi all,
>
> Here is my patch. I tested decoding a 160kbps 44.1kHz sample on a
> K8, around 10% faster in overall speed.
>
> Index: libavcodec/wmadec.c
> ===================================================================
> --- libavcodec/wmadec.c (revision 22281)
> +++ libavcodec/wmadec.c (working copy)
> @@ -769,8 +769,6 @@
>  static int wma_decode_frame(WMACodecContext *s, int16_t *samples)
>  {
>      int ret, i, n, ch, incr;
> -    int16_t *ptr;
> -    float *iptr;
>
>  #ifdef TRACE
>      tprintf(s->avctx, "***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len);
> @@ -790,17 +788,29 @@
>      /* convert frame to integer */
>      n = s->frame_len;
>      incr = s->nb_channels;
> -    for(ch = 0; ch < s->nb_channels; ch++) {
> -        ptr = samples + ch;
> -        iptr = s->frame_out[ch];
> +    if (s->dsp.float_to_int16_interleave == ff_float_to_int16_interleave_c) {
> +        for(ch = 0; ch < incr; ch++) {
> +            int16_t *ptr = samples + ch;
> +            float *iptr = s->frame_out[ch];
>
> -        for(i=0;i<n;i++) {
> -            *ptr = av_clip_int16(lrintf(*iptr++));
> -            ptr += incr;
> +            for(i=0;i<n;i++) {
> +                *ptr = av_clip_int16(lrintf(*iptr));
> +                ptr += incr;
> +                /* prepare for next block */
> +                iptr[0] = iptr[n];
> +                iptr++;
> +            }
>          }
> -        /* prepare for next block */
> -        memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
> -                s->frame_len * sizeof(float));
> +    } else {
> +        float *output[MAX_CHANNELS];
> +        for (ch = 0; ch < MAX_CHANNELS; ch++)
> +            output[ch] = s->frame_out[ch];
> +        s->dsp.float_to_int16_interleave(samples, (const float **)output, n, incr);
> +        for(ch = 0; ch < incr; ch++) {
> +            /* prepare for next block */
> +            memmove(&s->frame_out[ch][0], &s->frame_out[ch][n],
> +                    n * sizeof(float));
> +        }
>      }

This is way too hackish IMO.  The patch also looks more complicated
than it needs to be.

-- 
M?ns Rullg?rd
mans at mansr.com