[FFmpeg-devel] [PATCH 7/7] autocorrelate

Christophe Gisquet christophe.gisquet at gmail.com
Sat Apr 6 12:52:14 CEST 2013


---
 libavcodec/x86/sbrdsp.asm | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index a7998fa..77535a4 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -25,6 +25,7 @@ SECTION_RODATA
 ; mask equivalent for multiply by -1.0 1.0
 ps_mask         times 2 dd 1<<31, 0
 ps_mask2        times 2 dd 0, 1<<31
+ps_mask3        dd  0, 0, -1, -1
 ps_neg          times 4 dd 1<<31
 ps_noise0       times 2 dd  1.0,  0.0,
 ps_noise2       times 2 dd -1.0,  0.0
@@ -503,3 +504,73 @@ SBR_HF_APPLY_NOISE
 
 INIT_XMM avx
 SBR_HF_APPLY_NOISE
+
+INIT_XMM sse
+; void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2])
+cglobal sbr_autocorrelate, 2,3,8, x, phi, l
+    ; m6 = real_sum0  m7 = real_sum*
+    ; store:
+    ; m5l  <- (real_sum0) x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]
+    ; m5h0 <- (real_sum1) x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1]
+    ; m5h1 <- (imag_sum1) x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0]
+    ; m7h0 <- real_sum2 = x[ 0][0] * x[ 2][0] + x[ 0][1] * x[ 2][1]
+    ; m7h1 <- imag_sum2 = x[ 0][0] * x[ 2][1] - x[ 0][1] * x[ 2][0]
+    movlps   m7, [xq]
+    add      xq, 8
+    mova     m2, m7
+    mova     m5, m7
+    movu     m3, [xq]
+    shufps   m7, m7, q0000
+    shufps   m2, m2, q1111
+    mulps    m7, m3
+    mulps    m2, m3
+    mova     m4, [ps_mask2]
+    mulps    m5, m5
+    shufps   m2, m2, q2301
+    xorps    m6, m6 ; real_sum0 = 0
+    xorps    m2, m4 ; [ps_mask2]
+    movlhps  m3, m3 ; x2 x2
+    addps    m7, m2 ; init real_sum2/imag_sum2
+    movlhps  m5, m7 ; store final value for real_sum1/imag_sum1
+    movlhps  m7, m6 ; real_sum1/imag_sum1 = 0
+    mov      lq, -37*8
+    sub      xq, lq
+.loop:
+        ;~ real_sum0 += x[i][0] * x[i  ][0] + x[i][1] * x[i  ][1];
+        ;~ real_sum1 += x[i][0] * x[i+1][0] + x[i][1] * x[i+1][1];
+        ;~ imag_sum1 += x[i][0] * x[i+1][1] - x[i][1] * x[i+1][0];
+        ;~ real_sum2 += x[i][0] * x[i+2][0] + x[i][1] * x[i+2][1];
+        ;~ imag_sum2 += x[i][0] * x[i+2][1] - x[i][1] * x[i+2][0];
+    movhps   m3, [xq + lq]     ; x3 x2
+    mova     m1, m0
+    mova     m2, m0
+    shufps   m0, m0, q0000     ; x0
+    shufps   m1, m1, q1111     ; x1
+    mulps    m2, m2            ; x0*x0
+    mulps    m0, m3            ; x0*V
+    mulps    m1, m3            ; x1*V
+    addps    m6, m2            ; real_sum0 += x0*x0
+    addps    m7, m0            ; real_sum* += x0*V
+    shufps   m1, m1, q2301
+    xorps    m1, m4 ; [ps_mask2]    ; x1*Vrev
+    movhlps  m0, m3            ; x1 -> x0
+    addps    m7, m1            ; real_sum* += x1*Vrev
+    movlhps  m3, m3            ; x2 x2
+    add      lq, 8
+    jl    .loop
+    ; phi[2-2][1][0] = real_sum2
+    ; phi[2-2][1][1] = imag_sum2
+    movhps  [phiq + 1*8], m7   ; store real_sum2/imag_sum2
+
+    ; m5l  <- (real_sum0) x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]
+    ; m5h0 <- (real_sum1) x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1]
+    ; m5h1 <- (imag_sum1) x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0]
+    ; m7h0 <- real_sum2 = x[ 0][0] * x[ 2][0] + x[ 0][1] * x[ 2][1]
+    ; m7h1 <- imag_sum2 = x[ 0][0] * x[ 2][1] - x[ 0][1] * x[ 2][0]
+
+    ; phi[2  ][1][0] = real_sum0 + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]
+    ; phi[1  ][0][0] = real_sum0 + x[38][0] * x[38][0] + x[38][1] * x[38][1]
+    ; phi[2-1][1][0] = real_sum1 + x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1]
+    ; phi[2-1][1][1] = imag_sum1 + x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0]
+    ; phi[0  ][0][0] = real_sum1 + x[38][0] * x[39][0] + x[38][1] * x[39][1]
+    ; phi[0  ][0][1] = imag_sum1 + x[38][0] * x[39][1] - x[38][1] * x[39][0]
-- 
1.8.0.msysgit.0



More information about the ffmpeg-devel mailing list