[FFmpeg-cvslog] Fix png decoding on x86.
Reimar Döffinger
git at videolan.org
Fri Feb 3 23:13:02 CET 2012
ffmpeg | branch: master | Reimar Döffinger <Reimar.Doeffinger at gmx.de> | Fri Feb 3 21:23:49 2012 +0100| [58dabf7bf2fdd08f79173da0df613127ff783028] | committer: Reimar Döffinger
Fix png decoding on x86.
Line sizes are only 8-byte aligned, so use unaliged loads
for add_bytes_l2 pointers.
Increasing the alignment requirement to 16 seemed a bit extreme
(png may be used for rather small sizes).
Also fix a mov that had its arguments swapped, leading
add_bytes_l2 being applied on up to 8 bytes too few.
Signed-off-by: Reimar Döffinger <Reimar.Doeffinger at gmx.de>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=58dabf7bf2fdd08f79173da0df613127ff783028
---
libavcodec/pngdsp.h | 4 ++--
libavcodec/x86/pngdsp.asm | 18 +++++++++---------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index 98d29a8..f89a93a 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -26,8 +26,8 @@
typedef struct PNGDSPContext {
void (*add_bytes_l2)(uint8_t *dst /* align 16 */,
- uint8_t *src1 /* align 16 */,
- uint8_t *src2 /* align 16 */, int w);
+ uint8_t *src1,
+ uint8_t *src2, int w);
/* this might write to dst[w] */
void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src,
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index f3ec717..9c588a9 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -43,12 +43,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
and waq, ~(mmsize*2-1)
jmp .end_v
.loop_v:
- mova m0, [src1q+iq]
- mova m1, [src1q+iq+mmsize]
- paddb m0, [src2q+iq]
- paddb m1, [src2q+iq+mmsize]
- mova [dstq+iq ], m0
- mova [dstq+iq+mmsize], m1
+ movu m0, [src2q+iq]
+ movu m1, [src2q+iq+mmsize]
+ paddb m0, [src1q+iq]
+ paddb m1, [src1q+iq+mmsize]
+ movu [dstq+iq ], m0
+ movu [dstq+iq+mmsize], m1
add iq, mmsize*2
.end_v:
cmp iq, waq
@@ -56,12 +56,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
%if mmsize == 16
; vector loop
- mov wq, waq
+ mov waq, wq
and waq, ~7
jmp .end_l
.loop_l:
- movq mm0, [src1q+iq]
- paddb mm0, [src2q+iq]
+ movq mm0, [src2q+iq]
+ paddb mm0, [src1q+iq]
movq [dstq+iq ], mm0
add iq, 8
.end_l:
More information about the ffmpeg-cvslog
mailing list