FFmpeg: libavcodec/x86/mpegaudiodec

00001 /*
00002  * MMX optimized MP3 decoding functions
00003  * Copyright (c) 2010 Vitor Sessak
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "libavutil/cpu.h"
00023 #include "libavutil/x86_cpu.h"
00024 #include "libavcodec/dsputil.h"
00025 #include "libavcodec/mpegaudiodsp.h"
00026 
00027 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
00028 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
00029 
00030 #define SUM8(op, sum, w, p)               \
00031 {                                         \
00032     op(sum, (w)[0 * 64], (p)[0 * 64]);    \
00033     op(sum, (w)[1 * 64], (p)[1 * 64]);    \
00034     op(sum, (w)[2 * 64], (p)[2 * 64]);    \
00035     op(sum, (w)[3 * 64], (p)[3 * 64]);    \
00036     op(sum, (w)[4 * 64], (p)[4 * 64]);    \
00037     op(sum, (w)[5 * 64], (p)[5 * 64]);    \
00038     op(sum, (w)[6 * 64], (p)[6 * 64]);    \
00039     op(sum, (w)[7 * 64], (p)[7 * 64]);    \
00040 }
00041 
00042 static void apply_window(const float *buf, const float *win1,
00043                          const float *win2, float *sum1, float *sum2, int len)
00044 {
00045     x86_reg count = - 4*len;
00046     const float *win1a = win1+len;
00047     const float *win2a = win2+len;
00048     const float *bufa  = buf+len;
00049     float *sum1a = sum1+len;
00050     float *sum2a = sum2+len;
00051 
00052 
00053 #define MULT(a, b)                                 \
00054     "movaps " #a "(%1,%0), %%xmm1           \n\t"  \
00055     "movaps " #a "(%3,%0), %%xmm2           \n\t"  \
00056     "mulps         %%xmm2, %%xmm1           \n\t"  \
00057     "subps         %%xmm1, %%xmm0           \n\t"  \
00058     "mulps  " #b "(%2,%0), %%xmm2           \n\t"  \
00059     "subps         %%xmm2, %%xmm4           \n\t"  \
00060 
00061     __asm__ volatile(
00062             "1:                                   \n\t"
00063             "xorps       %%xmm0, %%xmm0           \n\t"
00064             "xorps       %%xmm4, %%xmm4           \n\t"
00065 
00066             MULT(   0,   0)
00067             MULT( 256,  64)
00068             MULT( 512, 128)
00069             MULT( 768, 192)
00070             MULT(1024, 256)
00071             MULT(1280, 320)
00072             MULT(1536, 384)
00073             MULT(1792, 448)
00074 
00075             "movaps      %%xmm0, (%4,%0)          \n\t"
00076             "movaps      %%xmm4, (%5,%0)          \n\t"
00077             "add            $16,  %0              \n\t"
00078             "jl              1b                   \n\t"
00079             :"+&r"(count)
00080             :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
00081             );
00082 
00083 #undef MULT
00084 }
00085 
00086 static void apply_window_mp3(float *in, float *win, int *unused, float *out,
00087                              int incr)
00088 {
00089     LOCAL_ALIGNED_16(float, suma, [17]);
00090     LOCAL_ALIGNED_16(float, sumb, [17]);
00091     LOCAL_ALIGNED_16(float, sumc, [17]);
00092     LOCAL_ALIGNED_16(float, sumd, [17]);
00093 
00094     float sum;
00095 
00096     /* copy to avoid wrap */
00097     memcpy(in + 512, in, 32 * sizeof(*in));
00098 
00099     apply_window(in + 16, win     , win + 512, suma, sumc, 16);
00100     apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
00101 
00102     SUM8(MACS, suma[0], win + 32, in + 48);
00103 
00104     sumc[ 0] = 0;
00105     sumb[16] = 0;
00106     sumd[16] = 0;
00107 
00108 #define SUMS(suma, sumb, sumc, sumd, out1, out2)               \
00109             "movups " #sumd "(%4),       %%xmm0          \n\t" \
00110             "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
00111             "subps  " #suma "(%1),       %%xmm0          \n\t" \
00112             "movaps        %%xmm0," #out1 "(%0)          \n\t" \
00113 \
00114             "movups " #sumc "(%3),       %%xmm0          \n\t" \
00115             "shufps         $0x1b,       %%xmm0, %%xmm0  \n\t" \
00116             "addps  " #sumb "(%2),       %%xmm0          \n\t" \
00117             "movaps        %%xmm0," #out2 "(%0)          \n\t"
00118 
00119     if (incr == 1) {
00120         __asm__ volatile(
00121             SUMS( 0, 48,  4, 52,  0, 112)
00122             SUMS(16, 32, 20, 36, 16,  96)
00123             SUMS(32, 16, 36, 20, 32,  80)
00124             SUMS(48,  0, 52,  4, 48,  64)
00125 
00126             :"+&r"(out)
00127             :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
00128             :"memory"
00129             );
00130         out += 16*incr;
00131     } else {
00132         int j;
00133         float *out2 = out + 32 * incr;
00134         out[0  ]  = -suma[   0];
00135         out += incr;
00136         out2 -= incr;
00137         for(j=1;j<16;j++) {
00138             *out  = -suma[   j] + sumd[16-j];
00139             *out2 =  sumb[16-j] + sumc[   j];
00140             out  += incr;
00141             out2 -= incr;
00142         }
00143     }
00144 
00145     sum = 0;
00146     SUM8(MLSS, sum, win + 16 + 32, in + 32);
00147     *out = sum;
00148 }
00149 
00150 void ff_mpadsp_init_mmx(MPADSPContext *s)
00151 {
00152     int mm_flags = av_get_cpu_flags();
00153 
00154     if (mm_flags & AV_CPU_FLAG_SSE2) {
00155         s->apply_window_float = apply_window_mp3;
00156     }
00157 }
libavcodec/x86/mpegaudiodec_mmx.c