[Ffmpeg-cvslog] r5898 - in trunk/libavcodec: dsputil.c dsputil.h i386/dsputil_mmx.c vorbis.c vorbis.h
lorenm
subversion
Thu Aug 3 05:18:48 CEST 2006
Author: lorenm
Date: Thu Aug 3 05:18:47 2006
New Revision: 5898
Modified:
trunk/libavcodec/dsputil.c
trunk/libavcodec/dsputil.h
trunk/libavcodec/i386/dsputil_mmx.c
trunk/libavcodec/vorbis.c
trunk/libavcodec/vorbis.h
Log:
sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8).
Modified: trunk/libavcodec/dsputil.c
==============================================================================
--- trunk/libavcodec/dsputil.c (original)
+++ trunk/libavcodec/dsputil.c Thu Aug 3 05:18:47 2006
@@ -35,6 +35,9 @@
/* snow.c */
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
+/* vorbis.c */
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t squareTbl[512] = {0, };
@@ -4090,6 +4093,10 @@
c->inner_add_yblock = ff_snow_inner_add_yblock;
#endif
+#ifdef CONFIG_VORBIS_DECODER
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+
c->shrink[0]= ff_img_copy_plane;
c->shrink[1]= ff_shrink22;
c->shrink[2]= ff_shrink44;
Modified: trunk/libavcodec/dsputil.h
==============================================================================
--- trunk/libavcodec/dsputil.h (original)
+++ trunk/libavcodec/dsputil.h Thu Aug 3 05:18:47 2006
@@ -307,6 +307,8 @@
void (*h261_loop_filter)(uint8_t *src, int stride);
+ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*fdct248)(DCTELEM *block/* align 16*/);
Modified: trunk/libavcodec/i386/dsputil_mmx.c
==============================================================================
--- trunk/libavcodec/i386/dsputil_mmx.c (original)
+++ trunk/libavcodec/i386/dsputil_mmx.c Thu Aug 3 05:18:47 2006
@@ -2711,6 +2711,59 @@
}
#endif
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+ int i;
+ asm volatile("pxor %%mm7, %%mm7":);
+ for(i=0; i<blocksize; i+=2) {
+ asm volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+ asm volatile("emms");
+}
+static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize)
+{
+ int i;
+ for(i=0; i<blocksize; i+=4) {
+ asm volatile(
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "pxor %%xmm2, %%xmm2 \n\t"
+ "pxor %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "pslld $31, %%xmm2 \n\t" // keep only the sign bit
+ "pxor %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "pand %%xmm1, %%xmm3 \n\t"
+ "pandn %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+}
+
#ifdef CONFIG_SNOW_ENCODER
extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width);
extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width);
@@ -3137,6 +3190,11 @@
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
#endif
+
+ if(mm_flags & MM_SSE2)
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2;
+ else if(mm_flags & MM_SSE)
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
}
#ifdef CONFIG_ENCODERS
Modified: trunk/libavcodec/vorbis.c
==============================================================================
--- trunk/libavcodec/vorbis.c (original)
+++ trunk/libavcodec/vorbis.c Thu Aug 3 05:18:47 2006
@@ -929,6 +929,7 @@
int i, j, hdr_type;
vc->avccontext = avccontext;
+ dsputil_init(&vc->dsp, avccontext);
if (!headers_len) {
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
@@ -1443,6 +1444,31 @@
return 0;
}
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
+{
+ int i;
+ for(i=0; i<blocksize; i++)
+ {
+ if (mag[i]>0.0) {
+ if (ang[i]>0.0) {
+ ang[i]=mag[i]-ang[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]+=temp;
+ }
+ } else {
+ if (ang[i]>0.0) {
+ ang[i]+=mag[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]-=temp;
+ }
+ }
+ }
+}
+
// Decode the audio packet using the functions above
#define BIAS 385
@@ -1541,26 +1567,7 @@
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
- for(j=0;j<blocksize/2;++j) {
- float temp;
- if (mag[j]>0.0) {
- if (ang[j]>0.0) {
- ang[j]=mag[j]-ang[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]+=temp;
- }
- } else {
- if (ang[j]>0.0) {
- ang[j]+=mag[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]-=temp;
- }
- }
- }
+ vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
}
// Dotproduct
Modified: trunk/libavcodec/vorbis.h
==============================================================================
--- trunk/libavcodec/vorbis.h (original)
+++ trunk/libavcodec/vorbis.h Thu Aug 3 05:18:47 2006
@@ -87,6 +87,7 @@
typedef struct vorbis_context_s {
AVCodecContext *avccontext;
GetBitContext gb;
+ DSPContext dsp;
MDCTContext mdct0;
MDCTContext mdct1;
More information about the ffmpeg-cvslog
mailing list