[FFmpeg-cvslog] r22828 - in trunk/libavcodec: arm/Makefile arm/fft_init_arm.c arm/synth_filter_neon.S synth_filter.h
mru
subversion
Sat Apr 10 18:27:56 CEST 2010
Author: mru
Date: Sat Apr 10 18:27:56 2010
New Revision: 22828
Log:
ARM: NEON optimised synth_filter_float
2.7x faster DCA decoding on Cortex-A8
Added:
trunk/libavcodec/arm/synth_filter_neon.S
Modified:
trunk/libavcodec/arm/Makefile
trunk/libavcodec/arm/fft_init_arm.c
trunk/libavcodec/synth_filter.h
Modified: trunk/libavcodec/arm/Makefile
==============================================================================
--- trunk/libavcodec/arm/Makefile Sat Apr 10 18:27:53 2010 (r22827)
+++ trunk/libavcodec/arm/Makefile Sat Apr 10 18:27:56 2010 (r22828)
@@ -33,6 +33,8 @@ NEON-OBJS-$(CONFIG_H264DSP) +
arm/h264idct_neon.o \
arm/h264pred_neon.o \
+NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_neon.o \
+
NEON-OBJS-$(CONFIG_VP3_DECODER) += arm/vp3dsp_neon.o
OBJS-$(HAVE_NEON) += arm/dsputil_init_neon.o \
Modified: trunk/libavcodec/arm/fft_init_arm.c
==============================================================================
--- trunk/libavcodec/arm/fft_init_arm.c Sat Apr 10 18:27:53 2010 (r22827)
+++ trunk/libavcodec/arm/fft_init_arm.c Sat Apr 10 18:27:56 2010 (r22828)
@@ -19,6 +19,7 @@
*/
#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
@@ -29,6 +30,12 @@ void ff_mdct_calc_neon(FFTContext *s, FF
void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
+void ff_synth_filter_float_neon(FFTContext *imdct,
+ float *synth_buf_ptr, int *synth_buf_offset,
+ float synth_buf2[32], const float window[512],
+ float out[32], const float in[32],
+ float scale, float bias);
+
av_cold void ff_fft_init_arm(FFTContext *s)
{
if (HAVE_NEON) {
@@ -48,3 +55,11 @@ av_cold void ff_rdft_init_arm(RDFTContex
s->rdft_calc = ff_rdft_calc_neon;
}
#endif
+
+#if CONFIG_DCA_DECODER
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
+{
+ if (HAVE_NEON)
+ s->synth_filter_float = ff_synth_filter_float_neon;
+}
+#endif
Added: trunk/libavcodec/arm/synth_filter_neon.S
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ trunk/libavcodec/arm/synth_filter_neon.S Sat Apr 10 18:27:56 2010 (r22828)
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+ preserve8
+
+function ff_synth_filter_float_neon, export=1
+ push {r3-r11,lr}
+
+ ldr r4, [r2] @ synth_buf_offset
+ add r1, r1, r4, lsl #2 @ synth_buf
+ sub r12, r4, #32
+ bfc r12, #9, #23
+ bic r4, r4, #63
+ str r12, [r2]
+
+ ldr r2, [sp, #12*4] @ in
+ mov r9, r1 @ synth_buf
+
+ bl ff_imdct_half_neon
+ pop {r3}
+
+ ldr r5, [sp, #9*4] @ window
+ ldr r2, [sp, #10*4] @ out
+ vldr d0, [sp, #12*4] @ scale, bias
+ add r8, r9, #12*4
+
+ mov lr, #64*4
+ mov r1, #4
+1:
+ add r10, r9, #16*4 @ synth_buf
+ add r11, r8, #16*4
+ add r0, r5, #16*4 @ window
+ add r6, r5, #32*4
+ add r7, r5, #48*4
+
+ vld1.32 {q10}, [r3,:128] @ a
+ add r3, r3, #16*4
+ vld1.32 {q1}, [r3,:128] @ b
+ vmov.f32 q2, #0.0 @ c
+ vmov.f32 q3, #0.0 @ d
+
+ mov r12, #512
+2:
+ vld1.32 {q9}, [r8, :128], lr
+ vrev64.32 q9, q9
+ vld1.32 {q8}, [r5, :128], lr
+ vmls.f32 d20, d16, d19
+ vld1.32 {q11}, [r0, :128], lr
+ vmls.f32 d21, d17, d18
+ vld1.32 {q12}, [r9, :128], lr
+ vmla.f32 d2, d22, d24
+ vld1.32 {q8}, [r6, :128], lr
+ vmla.f32 d3, d23, d25
+ vld1.32 {q9}, [r10,:128], lr
+ vmla.f32 d4, d16, d18
+ vld1.32 {q12}, [r11,:128], lr
+ vmla.f32 d5, d17, d19
+ vrev64.32 q12, q12
+ vld1.32 {q11}, [r7, :128], lr
+ vmla.f32 d6, d22, d25
+ vmla.f32 d7, d23, d24
+ subs r12, r12, #64
+ beq 3f
+ cmp r12, r4
+ bne 2b
+ sub r8, r8, #512*4
+ sub r9, r9, #512*4
+ sub r10, r10, #512*4
+ sub r11, r11, #512*4
+ b 2b
+3:
+ vdup.32 q8, d0[1]
+ vdup.32 q9, d0[1]
+ vmla.f32 q8, q10, d0[0]
+ vmla.f32 q9, q1, d0[0]
+ vst1.32 {q3}, [r3,:128]
+ sub r3, r3, #16*4
+ vst1.32 {q2}, [r3,:128]
+ vst1.32 {q8}, [r2,:128]
+ add r2, r2, #16*4
+ vst1.32 {q9}, [r2,:128]
+
+ subs r1, r1, #1
+ popeq {r4-r11,pc}
+
+ cmp r4, #0
+ subeq r8, r8, #512*4
+ subeq r9, r9, #512*4
+ sub r5, r5, #512*4
+ sub r2, r2, #12*4 @ out
+ add r3, r3, #4*4 @ synth_buf2
+ add r5, r5, #4*4 @ window
+ add r9, r9, #4*4 @ synth_buf
+ sub r8, r8, #4*4 @ synth_buf
+ b 1b
+endfunc
Modified: trunk/libavcodec/synth_filter.h
==============================================================================
--- trunk/libavcodec/synth_filter.h Sat Apr 10 18:27:53 2010 (r22827)
+++ trunk/libavcodec/synth_filter.h Sat Apr 10 18:27:56 2010 (r22828)
@@ -32,5 +32,6 @@ typedef struct SynthFilterContext {
} SynthFilterContext;
void ff_synth_filter_init(SynthFilterContext *c);
+void ff_synth_filter_init_arm(SynthFilterContext *c);
#endif /* AVCODEC_SYNTH_FILTER_H */
More information about the ffmpeg-cvslog
mailing list