[FFmpeg-devel] [PATCH 3/3] x86: sbrdsp: implement SSE2 hf_apply_noise

Michael Niedermayer michaelni at gmx.at
Sun Apr 14 05:04:43 CEST 2013


On Sat, Apr 13, 2013 at 06:02:43PM +0200, Christophe Gisquet wrote:
> 2013/4/13 Michael Niedermayer <michaelni at gmx.at>:
> >> +%define count kxq
> >> +%else
> >> +%define count m_maxq
> >> +%endif
> >> +    dec    noiseq
> >> +    shl    count, 2
> >> +%if NREGS
> >> +    lea       r5q, [sbr_noise_table]
> >
> > count and r5q end being the same register here on x86_64 linux shared
> 
> Fixed that in the fashion we discussed. Also replaced the cmpeqps by
> pcmpeqd for a 2 cycles gain.

Seems to crash on linux x86_64 shared
   0x00007ffff722d0f5 <apply_noise_main+0>: dec    %rcx
   0x00007ffff722d0f8 <apply_noise_main+3>: shl    $0x2,%r9
   0x00007ffff722d0fc <apply_noise_main+7>: lea    0x1a95d(%rip),%rax        # 0x7ffff7247a60 <ff_sbr_noise_table>
   0x00007ffff722d103 <apply_noise_main+14>:    lea    (%rdi,%r9,2),%rdi
   0x00007ffff722d107 <apply_noise_main+18>:    add    %r9,%rsi
   0x00007ffff722d10a <apply_noise_main+21>:    add    %r9,%rdx
   0x00007ffff722d10d <apply_noise_main+24>:    shl    $0x3,%rcx
   0x00007ffff722d111 <apply_noise_main+28>:    pxor   %xmm5,%xmm5
   0x00007ffff722d115 <apply_noise_main+32>:    neg    %r9
   0x00007ffff722d118 <apply_noise_main.loop+0>:    movdqa (%rdx,%r9,1),%xmm1
   0x00007ffff722d11e <apply_noise_main.loop+6>:    movdqu 0x10(%rcx,%rax,1),%xmm3
   0x00007ffff722d124 <apply_noise_main.loop+12>:   movdqu 0x20(%rcx,%rax,1),%xmm4
   0x00007ffff722d12a <apply_noise_main.loop+18>:   add    $0x20,%rcx
   0x00007ffff722d12e <apply_noise_main.loop+22>:   and    $0xff8,%rcx
   0x00007ffff722d135 <apply_noise_main.loop+29>:   movdqa %xmm1,%xmm2
   0x00007ffff722d139 <apply_noise_main.loop+33>:   punpckhdq %xmm1,%xmm2
   0x00007ffff722d13d <apply_noise_main.loop+37>:   punpckldq %xmm1,%xmm1
   0x00007ffff722d141 <apply_noise_main.loop+41>:   mulps  %xmm3,%xmm1
   0x00007ffff722d144 <apply_noise_main.loop+44>:   mulps  %xmm4,%xmm2
=> 0x00007ffff722d147 <apply_noise_main.loop+47>:   movdqa (%rsi,%r9,1),%xmm3
   0x00007ffff722d14d <apply_noise_main.loop+53>:   movdqa %xmm3,%xmm4
   0x00007ffff722d151 <apply_noise_main.loop+57>:   punpckhdq %xmm3,%xmm4
   0x00007ffff722d155 <apply_noise_main.loop+61>:   punpckldq %xmm3,%xmm3
   0x00007ffff722d159 <apply_noise_main.loop+65>:   movdqa %xmm3,%xmm6
   0x00007ffff722d15d <apply_noise_main.loop+69>:   pcmpeqd %xmm5,%xmm6
   0x00007ffff722d161 <apply_noise_main.loop+73>:   movdqa %xmm4,%xmm7
   0x00007ffff722d165 <apply_noise_main.loop+77>:   pcmpeqd %xmm5,%xmm7

rax            0x7ffff7247a60   140737339751008
rbx            0x23 35
rcx            0x590    1424
rdx            0x27fffd4d61650  703686717609552
rsi            0x27fffd4da1350  703686717870928
rdi            0x47fffb1b4d9d0  1266636081650128
rbp            0x7fffffffd5b0   0x7fffffffd5b0
rsp            0x7fffffffd368   0x7fffffffd368
r8             0x0  0
r9             0xfffe000023227cb0   -562949363958608
r10            0x2b 43
r11            0x23 35
r12            0x7ffff7f5b720   140737353463584
r13            0x7ffff7f5b780   140737353463680
r14            0x1  1
r15            0x23 35
rip            0x7ffff722d147   0x7ffff722d147 <apply_noise_main.loop+47>

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130414/0f86b609/attachment.asc>


More information about the ffmpeg-devel mailing list