[FFmpeg-devel] [FFmpeg-commits] Implement a SIMD version of emulated_edge_mc() for x86.

Daniel Verkamp daniel
Mon Feb 7 08:18:06 CET 2011


On Mon, Jan 31, 2011 at 7:01 PM, Ronald S. Bultje <git at ffmpeg.org> wrote:
> Module: ffmpeg
> Branch: master
> Commit: 81f2a3f4ffcc6935b8b8ada4954700b3f333ae4f
>
> Author: Ronald S. Bultje <rsbultje at gmail.com>
> Date: ? Mon Jan 31 20:55:56 2011 -0500
>
> Implement a SIMD version of emulated_edge_mc() for x86.

This crashes on a mingw-w64 build run on Win7 x64:

GNU gdb (GDB) 7.2
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-w64-mingw32".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from Z:\src\ffmpeg\ffmpeg-git\build-mingw-w64/ffplay_g.exe...don
e.
(gdb) r G:\files\video\1337.mp4
Starting program: Z:\src\ffmpeg\ffmpeg-git\build-mingw-w64/ffplay_g.exe G:\files
\video\1337.mp4
[New Thread 4348.0x11dc]
[New Thread 4348.0x898]
[New Thread 4348.0xec8]
[New Thread 4348.0x710]
[New Thread 4348.0x10e4]
[New Thread 4348.0x121c]
[New Thread 4348.0x438]
[New Thread 4348.0xd10]
[New Thread 4348.0x124c]
[New Thread 4348.0x115c]
[New Thread 4348.0x1004]

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 4348.0x115c]
0x00000000008715eb in _ff_emu_edge_core_sse.emuedge_extend_bottom_15_loop ()
(gdb) display /i $pc
1: x/i $pc
=> 0x8715eb <_ff_emu_edge_core_sse.emuedge_extend_bottom_15_loop>:
    movq   %mm0,(%rcx)
(gdb) disas
Dump of assembler code for function _ff_emu_edge_core_sse.emuedge_extend_bottom_
15_loop:
=> 0x00000000008715eb <+0>:     movq   %mm0,(%rcx)
   0x00000000008715ee <+3>:     movd   %mm1,0x8(%rcx)
   0x00000000008715f2 <+7>:     mov    %r9w,0xc(%rcx)
   0x00000000008715f7 <+12>:    mov    %al,0xe(%rcx)
   0x00000000008715fa <+15>:    add    %r8,%rcx
   0x00000000008715fd <+18>:    dec    %rsi
   0x0000000000871600 <+21>:    jne    0x8715eb <_ff_emu_edge_core_sse.emuedge_e
xtend_bottom_15_loop>
End of assembler dump.
(gdb) info all-registers
rax            0x870e00 8850944
rbx            0x4449040        71602240
rcx            0x5130011        85131281
rdx            0x6b36d00        112422144
rsi            0xfffffffffffff640       -2496
rdi            0x11     17
rbp            0x1      0x1
rsp            0x679f5b0        0x679f5b0
r8             0x160    352
r9             0x0      0
r10            0x871600 8852992
r11            0x11     17
r12            0x6b36cff        112422143
r13            0x0      0
r14            0x0      0
r15            0xb0     176
rip            0x8715eb 0x8715eb <_ff_emu_edge_core_sse.emuedge_extend_bottom_15
_loop>
eflags         0x10282  [ SF IF RF ]
cs             0x33     51
ss             0x282002b        42074155
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x2b0000 2818048
st0            -nan(0x8787878787878787) (raw 0xffff8787878787878787)
st1            -nan(0x87008700870087)   (raw 0xffff0087008700870087)
st2            -nan(0x8888888888888888) (raw 0xffff8888888888888888)
st3            -nan(0x88008800880088)   (raw 0xffff0088008800880088)
st4            -nan(0x86008600860086)   (raw 0xffff0086008600860086)
st5            -nan(0x86008600860086)   (raw 0xffff0086008600860086)
st6            -nan(0x86008600860086)   (raw 0xffff0086008600860086)
st7            -inf     (raw 0xffff0000000000000000)
fctrl          0x27f    639
fstat          0xff0000 16711680
ftag           0xff     255
fiseg          0x0      0
fioff          0x0      0
foseg          0x0      0
fooff          0x0      0
fop            0x0      0
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x34, 0x32, 0x34, 0x33, 0x33, 0x34, 0x32, 0x34, 0x34, 0x35,
    0x35, 0x34, 0x34, 0x36, 0x35, 0x33}, v8_int16 = {0x3234, 0x3334, 0x3433,
    0x3432, 0x3534, 0x3435, 0x3634, 0x3335}, v4_int32 = {0x33343234,
    0x34323433, 0x34353534, 0x33353634}, v2_int64 = {0x3432343333343234,
    0x3335363434353534}, uint128 = 0x33353634343535343432343333343234}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x33, 0x33, 0x34, 0x33, 0x33, 0x34, 0x33, 0x33, 0x33, 0x35,
    0x36, 0x35, 0x33, 0x33, 0x34, 0x35}, v8_int16 = {0x3333, 0x3334, 0x3433,
    0x3333, 0x3533, 0x3536, 0x3333, 0x3534}, v4_int32 = {0x33343333,
    0x33333433, 0x35363533, 0x35343333}, v2_int64 = {0x3333343333343333,
    0x3534333335363533}, uint128 = 0x35343333353635333333343333343333}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x34, 0x36, 0x34, 0x35, 0x35, 0x34, 0x36, 0x34, 0x35, 0x33,
    0x33, 0x34, 0x35, 0x34, 0x34, 0x34}, v8_int16 = {0x3634, 0x3534, 0x3435,
    0x3436, 0x3335, 0x3433, 0x3435, 0x3434}, v4_int32 = {0x35343634,
    0x34363435, 0x34333335, 0x34343435}, v2_int64 = {0x3436343535343634,
    0x3434343534333335}, uint128 = 0x34343435343333353436343535343634}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm6           {v4_float = {0x1, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0, 0x0, 0x80, 0x3f, 0x0 <repeats 12 times>}, v8_int16 = {0x0,
    0x3f80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x3f800000, 0x0, 0x0,
    0x0}, v2_int64 = {0x3f800000, 0x0},
  uint128 = 0x0000000000000000000000003f800000}
xmm7           {v4_float = {0x0, 0xa, 0x0, 0x0}, v2_double = {0xf4240, 0x0},
  v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84, 0x2e, 0x41, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x8480, 0x412e, 0x0, 0x0, 0x0,
    0x0}, v4_int32 = {0x0, 0x412e8480, 0x0, 0x0}, v2_int64 = {
    0x412e848000000000, 0x0}, uint128 = 0x0000000000000000412e848000000000}
xmm8           {v4_float = {0x0, 0x1, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xe0, 0x3f, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x3fe0, 0x0, 0x0, 0x0,
    0x0}, v4_int32 = {0x0, 0x3fe00000, 0x0, 0x0}, v2_int64 = {
    0x3fe0000000000000, 0x0}, uint128 = 0x00000000000000003fe0000000000000}
xmm9           {v4_float = {0xfdcb9e00, 0xffffffff, 0x0, 0x0}, v2_double = {
    0x0, 0x0}, v16_int8 = {0x80, 0x18, 0xd, 0xcc, 0xbe, 0xbb, 0xa6, 0xbf,
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x1880, 0xcc0d,
    0xbbbe, 0xbfa6, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xcc0d1880, 0xbfa6bbbe,
    0x0, 0x0}, v2_int64 = {0xbfa6bbbecc0d1880, 0x0},
  uint128 = 0x0000000000000000bfa6bbbecc0d1880}
xmm10          {v4_float = {0x0, 0x2, 0x0, 0x0}, v2_double = {0xa, 0x0},
  v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x24, 0x40, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x4024, 0x0, 0x0, 0x0,
    0x0}, v4_int32 = {0x0, 0x40240000, 0x0, 0x0}, v2_int64 = {
    0x4024000000000000, 0x0}, uint128 = 0x00000000000000004024000000000000}
xmm11          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {
    0x8000000000000000, 0x0}, v16_int8 = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xff, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xffff,
    0xffff, 0xffff, 0x7fff, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xffffffff,
    0x7fffffff, 0x0, 0x0}, v2_int64 = {0x7fffffffffffffff, 0x0},
  uint128 = 0x00000000000000007fffffffffffffff}
xmm12          {v4_float = {0x15c28, 0x1, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x7b, 0x14, 0xae, 0x47, 0xe1, 0x7a, 0x84, 0x3f, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x147b, 0x47ae, 0x7ae1, 0x3f84,
    0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x47ae147b, 0x3f847ae1, 0x0, 0x0},
  v2_int64 = {0x3f847ae147ae147b, 0x0},
  uint128 = 0x00000000000000003f847ae147ae147b}
xmm13          {v4_float = {0x0, 0xa, 0x0, 0x0}, v2_double = {0xf4240, 0x0},
  v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84, 0x2e, 0x41, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x8480, 0x412e, 0x0, 0x0, 0x0,
    0x0}, v4_int32 = {0x0, 0x412e8480, 0x0, 0x0}, v2_int64 = {
    0x412e848000000000, 0x0}, uint128 = 0x0000000000000000412e848000000000}
xmm14          {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0,
    0x0}, v4_int32 = {0x0, 0x80000000, 0x0, 0x0}, v2_int64 = {
    0x8000000000000000, 0x0}, uint128 = 0x00000000000000008000000000000000}
xmm15          {v4_float = {0x0, 0xfffffff6, 0x0, 0x0}, v2_double = {
    0xfffffffffff0bdc0, 0x0}, v16_int8 = {0x0, 0x0, 0x0, 0x0, 0x80, 0x84,
    0x2e, 0xc1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0x0,
    0x0, 0x8480, 0xc12e, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0xc12e8480,
    0x0, 0x0}, v2_int64 = {0xc12e848000000000, 0x0},
  uint128 = 0x0000000000000000c12e848000000000}
mxcsr          0x1fa0   [ PE IM DM ZM OM UM PM ]
(gdb)

Crash occurs in normal ffmpeg conversion as well.

Sample: http://drv.nu/temp/1337.mp4 - plays fine with that patch reverted.

Thanks,
-- Daniel Verkamp



More information about the ffmpeg-devel mailing list