[FFmpeg-cvslog] r12661 - in trunk/libavcodec/i386: dsputil_h264_template_mmx.c dsputil_h264_template_ssse3.c dsputil_mmx.c dsputil_mmx.h h264dsp_mmx.c

Mike Melanson mike
Tue Apr 1 18:47:51 CEST 2008


Loren Merritt wrote:
> They all crashed on the first inter frame. Can you provide a backtrace and 
> a disassembly of the chroma fnuctions? (No icc here, and I'm not about to 
> install something that requires registration for a per-user license, even 
> if it is in portage.)

Certainly. I ran ffmpeg_g compiled with icc on the sample in this test case:

  http://fate.multimedia.cx/index.php?test_spec=7

And this is running on a VMware-hosted 32-bit Ubuntu session on a Core 2
Duo CPU. What's the best way disassembly for the chroma functions?
objdump on the object files?


$ gdb ./ffmpeg_g
GNU gdb 6.6-debian
Copyright (C) 2006 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "i486-linux-gnu"...
Using host libthread_db library "/lib/tls/i686/cmov/libthread_db.so.1".


(gdb) r -f h264 -i /mnt/fate-suite/h264-conformance/AUD_MW_E.264 -f
framecrc -
Starting program: /home/melanson/ffmpeg/build-icc/ffmpeg_g -f h264 -i
/mnt/fate-suite/h264-conformance/AUD_MW_E.264 -f framecrc -
FFmpeg version SVN-r12665, Copyright (c) 2000-2008 Fabrice Bellard, et al.
  configuration: --cc=ccache /opt/intel/cc/10.1.012/bin/icc
  libavutil version: 49.6.0
  libavcodec version: 51.54.0
  libavformat version: 52.13.0
  libavdevice version: 52.0.0
  built on Apr  1 2008 07:44:12, gcc: Intel(R) C++ gcc 4.1 mode
Input #0, h264, from '/mnt/fate-suite/h264-conformance/AUD_MW_E.264':
  Duration: N/A, bitrate: N/A
    Stream #0.0: Video: h264, yuv420p, 176x144, 25.00 tb(r)
Output #0, framecrc, to 'pipe:':
    Stream #0.0: Video: rawvideo, yuv420p, 176x144, q=2-31, 200 kb/s,
25.00 tb(c)
Stream mapping:
  Stream #0.0 -> #0.0
Press [q] to stop encoding
0, 0, 38016, 0xa6d63b2e

Program received signal SIGSEGV, Segmentation fault.
0x080ed539 in put_h264_chroma_mc8_ssse3_rnd (
    dst=0x86a9798 '\200' <repeats 88 times>, "yyu", 't' <repeats 13
times>, '\200' <repeats 88 times>, "xxtttttu"...,
    src=0x8648281
"tttttttwwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv",
't' <repeats 32 times>, 'y' <repeats 12 times>, 't' <repeats 20 times>,
"wwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv", 't'
<repeats 32 times>, 'y' <repeats 12 times>, "ttttt"..., stride=104, h=4,
x=3, y=1)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/i386/h264dsp_mmx.c:1981
1981        put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);


(gdb) bt
#0  0x080ed539 in put_h264_chroma_mc8_ssse3_rnd (
    dst=0x86a9798 '\200' <repeats 88 times>, "yyu", 't' <repeats 13
times>, '\200' <repeats 88 times>, "xxtttttu"...,
    src=0x8648281
"tttttttwwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv",
't' <repeats 32 times>, 'y' <repeats 12 times>, 't' <repeats 20 times>,
"wwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv", 't'
<repeats 32 times>, 'y' <repeats 12 times>, "ttttt"..., stride=104, h=4,
x=3, y=1)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/i386/h264dsp_mmx.c:1981
#1  0x0825c0c4 in mc_part (h=0x68, n=4, square=5411, chroma_height=0,
    delta=140868432, dest_y=0x825bc63 "?\a???\213\234$\214",
    dest_cb=0x8657b64 "", dest_cr=0x8657b60 "", x_offset=1, y_offset=4,
    qpix_put=0x8, chroma_put=0x86a1090, qpix_avg=0x86a95f8,
    chroma_avg=0x86ab9f8, weight_op=0x0, weight_avg=0x4, list0=140872004,
    list1=135189392)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:1848
#2  0x0825bc63 in hl_motion (h=0x68, dest_y=0x4 <Address 0x4 out of bounds>,
    dest_cb=0x1523 <Address 0x1523 out of bounds>,
    dest_cr=0x86ab9f8 "\203\203\203\203\203\203\203\203", '\200'
<repeats 80 times>, '\203' <repeats 24 times>, '\200' <repeats 80
times>, "\203\203\203\203\203\203\203\203"..., qpix_put=0x8658904,
chroma_put=0x86588e0,
    qpix_avg=0x8658a04, chroma_avg=0x86588f8, weight_op=0x8658d04,
    weight_avg=0x8658d2c)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:1890
#3  0x08256ec6 in hl_decode_mb_simple (h=0x68)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:2714
#4  0x0824b3d8 in decode_slice (avctx=0x68, h=0x4)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:6822
#5  0x0824b2dc in execute_decode_slices (h=0x68, context_count=4)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7406
#6  0x0824b0bd in decode_nal_units (h=0x68,
    buf=0x4 <Address 0x4 out of bounds>, buf_size=5411)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7592
#7  0x0824a3d1 in decode_frame (avctx=0x862f3d0, data=0xbfce36f0,
    data_size=0xbfce37b0, buf=0x8682dc0 "", buf_size=364)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7722
#8  0x080e7e86 in avcodec_decode_video (avctx=0x16c, picture=0x8682dc0,
    got_picture_ptr=0x868eb20, buf=0x16c <Address 0x16c out of bounds>,
    buf_size=134601160)
    at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/utils.c:945
#9  0x0805d9c8 in output_packet (ist=0x68, ist_index=4, ost_table=0x1523,
    nb_ostreams=1, pkt=0x868eb20)
    at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:1112
#10 0x0805d014 in av_encode (output_files=0x68, nb_output_files=4,
    input_files=0x1523, nb_input_files=1, stream_maps=0x8527ba0,
    nb_stream_maps=0) at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:1992
#11 0x0805b63a in main (argc=8, argv=0xbfce3ef4)
    at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:3933


(gdb) disass $pc-32 $pc+32
Dump of assembler code from 0x80ed519 to 0x80ed559:
0x080ed519 <put_h264_chroma_mc8_ssse3_rnd+393>: fiaddl 0x4e8d08c2(%ebx)
0x080ed51f <put_h264_chroma_mc8_ssse3_rnd+399>: or     %cl,(%edi)
0x080ed521 <put_h264_chroma_mc8_ssse3_rnd+401>: scas   %es:(%edi),%eax
0x080ed522 <put_h264_chroma_mc8_ssse3_rnd+402>: lret   $0xc683
0x080ed525 <put_h264_chroma_mc8_ssse3_rnd+405>: or     %cl,(%edi)
0x080ed527 <put_h264_chroma_mc8_ssse3_rnd+407>: scas   %es:(%edi),%eax
0x080ed528 <put_h264_chroma_mc8_ssse3_rnd+408>: lock mov 0x28(%esp),%edx
0x080ed52d <put_h264_chroma_mc8_ssse3_rnd+413>: mov    0x24(%esp),%eax
0x080ed531 <put_h264_chroma_mc8_ssse3_rnd+417>: movd   %ecx,%xmm7
0x080ed535 <put_h264_chroma_mc8_ssse3_rnd+421>: movd   %esi,%xmm6
0x080ed539 <put_h264_chroma_mc8_ssse3_rnd+425>: movdqa (%esp),%xmm5
0x080ed53e <put_h264_chroma_mc8_ssse3_rnd+430>: pshuflw $0x0,%xmm7,%xmm7
0x080ed543 <put_h264_chroma_mc8_ssse3_rnd+435>: pshuflw $0x0,%xmm6,%xmm6
0x080ed548 <put_h264_chroma_mc8_ssse3_rnd+440>: movlhps %xmm7,%xmm7
0x080ed54b <put_h264_chroma_mc8_ssse3_rnd+443>: movlhps %xmm6,%xmm6
0x080ed54e <put_h264_chroma_mc8_ssse3_rnd+446>: mov    0x1c(%esp),%esi
0x080ed552 <put_h264_chroma_mc8_ssse3_rnd+450>: mov    0x20(%esp),%ecx
0x080ed556 <put_h264_chroma_mc8_ssse3_rnd+454>: movq   (%ecx),%xmm0
End of assembler dump.


(gdb) info all-registers
eax            0x68     104
ecx            0x1523   5411
edx            0x4      4
ebx            0x0      0
esp            0xbfce3274       0xbfce3274
ebp            0x8      0x8
esi            0x305    773
edi            0x200020 2097184
eip            0x80ed539        0x80ed539
<put_h264_chroma_mc8_ssse3_rnd+425>
eflags         0x10216  [ PF AF IF RF ]
cs             0x73     115
ss             0x7b     123
ds             0x7b     123
es             0x7b     123
fs             0x0      0
gs             0x33     51
st0            -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st1            -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st2            -nan(0x20e020e020e020e0) (raw 0xffff20e020e020e020e0)
st3            -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st4            -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st5            -nan(0x20002000200020)   (raw 0xffff0020002000200020)
st6            <invalid float value>    (raw 0xffff0000000000000000)
st7            <invalid float value>    (raw 0xffff0000000000000000)
fctrl          0x37f    895
fstat          0x120    288
ftag           0xaaaa   43690
fiseg          0x73     115
fioff          0x805ce8b        134598283
foseg          0x7b     123
fooff          0x0      0
fop            0x6d9    1753
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0xdb, 0x0, 0xdb, 0x0, 0xdb, 0x0, 0xde, 0x0, 0xdd, 0x0, 0xda,
    0x0, 0xd9, 0x0, 0xd9, 0x0}, v8_int16 = {0xdb, 0xdb, 0xdb, 0xde, 0xdd,
    0xda, 0xd9, 0xd9}, v4_int32 = {0xdb00db, 0xde00db, 0xda00dd, 0xd900d9},
  v2_int64 = {0xde00db00db00db, 0xd900d900da00dd},
  uint128 = 0x00d900d900da00dd00de00db00db00db}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0xb6, 0x1, 0xb3, 0x1, 0xb3, 0x1, 0xb3, 0x1, 0xbb, 0x1, 0xba,
    0x1, 0xb5, 0x1, 0xbb, 0x1}, v8_int16 = {0x1b6, 0x1b3, 0x1b3, 0x1b3,
0x1bb,
    0x1ba, 0x1b5, 0x1bb}, v4_int32 = {0x1b301b6, 0x1b301b3, 0x1ba01bb,
    0x1bb01b5}, v2_int64 = {0x1b301b301b301b6, 0x1bb01b501ba01bb},
  uint128 = 0x01bb01b501ba01bb01b301b301b301b6}
xmm2           {v4_float = {0x80000000, 0x0, 0x0, 0x0}, v2_double = {
    0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xd9, 0xd9, 0xd9,
    0xda, 0xda, 0xdb, 0xde, 0xdd, 0x6d, 0x6d, 0x6d, 0x6d, 0x6d, 0x6f, 0x72,
    0x6f}, v8_int16 = {0xd9d9, 0xdad9, 0xdbda, 0xddde, 0x6d6d, 0x6d6d,
0x6f6d,
    0x6f72}, v4_int32 = {0xdad9d9d9, 0xdddedbda, 0x6d6d6d6d, 0x6f726f6d},
  v2_int64 = {0xdddedbdadad9d9d9, 0x6f726f6d6d6d6d6d},
  uint128 = 0x6f726f6d6d6d6d6ddddedbdadad9d9d9}
xmm3           {v4_float = {0x80000000, 0x0, 0x0, 0x0}, v2_double = {
    0x8000000000000000, 0x0}, v16_int8 = {0xd9, 0xd9, 0xd9, 0xda, 0xda,
0xd8,
    0xd9, 0xdd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xd9d9,
    0xdad9, 0xd8da, 0xddd9, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xdad9d9d9,
    0xddd9d8da, 0x0, 0x0}, v2_int64 = {0xddd9d8dadad9d9d9, 0x0},
  uint128 = 0x0000000000000000ddd9d8dadad9d9d9}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0xdd, 0x0, 0xda, 0x0, 0xd9, 0x0, 0xd9, 0x0, 0xd9, 0x0, 0xda,
    0x0, 0xda, 0x0, 0xe2, 0x0}, v8_int16 = {0xdd, 0xda, 0xd9, 0xd9, 0xd9,
    0xda, 0xda, 0xe2}, v4_int32 = {0xda00dd, 0xd900d9, 0xda00d9, 0xe200da},
  v2_int64 = {0xd900d900da00dd, 0xe200da00da00d9},
  uint128 = 0x00e200da00da00d900d900d900da00dd}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0xc7, 0x1, 0xc7, 0x1, 0xc4, 0x1, 0xcb, 0x1, 0xc9, 0x1, 0xc4,
    0x1, 0xc3, 0x1, 0xbf, 0x1}, v8_int16 = {0x1c7, 0x1c7, 0x1c4, 0x1cb,
0x1c9,
    0x1c4, 0x1c3, 0x1bf}, v4_int32 = {0x1c701c7, 0x1cb01c4, 0x1c401c9,
    0x1bf01c3}, v2_int64 = {0x1cb01c401c701c7, 0x1bf01c301c401c9},
  uint128 = 0x01bf01c301c401c901cb01c401c701c7}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x5, 0x3, 0x0 <repeats 14 times>}, v8_int16 = {0x305, 0x0,
0x0,
    0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x305, 0x0, 0x0, 0x0},
v2_int64 = {
    0x305, 0x0}, uint128 = 0x00000000000000000000000000000305}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
  v16_int8 = {0x23, 0x15, 0x0 <repeats 14 times>}, v8_int16 = {0x1523, 0x0,
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x1523, 0x0, 0x0, 0x0},
  v2_int64 = {0x1523, 0x0}, uint128 = 0x00000000000000000000000000001523}
mxcsr          0x9fe0   [ PE DAZ IM DM ZM OM UM PM FZ ]
mm0            {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
    0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
    0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm1            {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
    0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
    0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm2            {uint64 = 0x20e020e020e020e0, v2_int32 = {0x20e020e0,
    0x20e020e0}, v4_int16 = {0x20e0, 0x20e0, 0x20e0, 0x20e0}, v8_int8 =
{0xe0,
    0x20, 0xe0, 0x20, 0xe0, 0x20, 0xe0, 0x20}}
mm3            {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
    0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
    0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm4            {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
    0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
    0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm5            {uint64 = 0x20002000200020, v2_int32 = {0x200020, 0x200020},
  v4_int16 = {0x20, 0x20, 0x20, 0x20}, v8_int8 = {0x20, 0x0, 0x20, 0x0,
0x20,
    0x0, 0x20, 0x0}}
mm6            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0,
    0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm7            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0,
    0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}


-- 
	-Mike Melanson




More information about the ffmpeg-cvslog mailing list