[FFmpeg-cvslog] x86/lossless_videodsp: simplify and explicit aligned/unaligned flags
Clément Bœsch
git at videolan.org
Sat Jan 25 16:26:54 CET 2014
ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Thu Jan 23 07:58:09 2014 +0100| [cddbfd2a95540084fdf660b46fe755255f05d4df] | committer: Clément Bœsch
x86/lossless_videodsp: simplify and explicit aligned/unaligned flags
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cddbfd2a95540084fdf660b46fe755255f05d4df
---
libavcodec/x86/lossless_videodsp.asm | 82 +++++++++++-----------------------
1 file changed, 27 insertions(+), 55 deletions(-)
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index a1869b3..59ca733 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION_TEXT
-%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
+%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
movd m4, maskd
SPLATW m4, m4
add wq, wq
@@ -51,28 +51,16 @@ SECTION_TEXT
neg wq
jz %%.end
%%.loop:
-%if %1
- mova m0, [srcq+wq]
- mova m1, [dstq+wq]
- mova m2, [srcq+wq+mmsize]
- mova m3, [dstq+wq+mmsize]
-%else
- movu m0, [srcq+wq]
- movu m1, [dstq+wq]
- movu m2, [srcq+wq+mmsize]
- movu m3, [dstq+wq+mmsize]
-%endif
+ mov%1 m0, [srcq+wq]
+ mov%1 m1, [dstq+wq]
+ mov%1 m2, [srcq+wq+mmsize]
+ mov%1 m3, [dstq+wq+mmsize]
paddw m0, m1
paddw m2, m3
pand m0, m4
pand m2, m4
-%if %1
- mova [dstq+wq] , m0
- mova [dstq+wq+mmsize], m2
-%else
- movu [dstq+wq] , m0
- movu [dstq+wq+mmsize], m2
-%endif
+ mov%1 [dstq+wq] , m0
+ mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
@@ -81,7 +69,7 @@ SECTION_TEXT
INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w
- ADD_INT16_LOOP 1
+ ADD_INT16_LOOP a
INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w
@@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- ADD_INT16_LOOP 1
+ ADD_INT16_LOOP a
.unaligned:
- ADD_INT16_LOOP 0
+ ADD_INT16_LOOP u
-%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned
+%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
movd m4, maskd
SPLATW m4, m4
add wq, wq
@@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
neg wq
jz %%.end
%%.loop:
-%if %1
- mova m0, [src1q+wq]
- mova m1, [src2q+wq]
- mova m2, [src1q+wq+mmsize]
- mova m3, [src2q+wq+mmsize]
-%else
- movu m0, [src1q+wq]
- movu m1, [src2q+wq]
- movu m2, [src1q+wq+mmsize]
- movu m3, [src2q+wq+mmsize]
-%endif
+ mov%1 m0, [src1q+wq]
+ mov%1 m1, [src2q+wq]
+ mov%1 m2, [src1q+wq+mmsize]
+ mov%1 m3, [src2q+wq+mmsize]
psubw m0, m1
psubw m2, m3
pand m0, m4
pand m2, m4
-%if %1
- mova [dstq+wq] , m0
- mova [dstq+wq+mmsize], m2
-%else
- movu [dstq+wq] , m0
- movu [dstq+wq+mmsize], m2
-%endif
+ mov%1 [dstq+wq] , m0
+ mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
@@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
- DIFF_INT16_LOOP 1
+ DIFF_INT16_LOOP a
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
@@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- DIFF_INT16_LOOP 1
+ DIFF_INT16_LOOP a
.unaligned:
- DIFF_INT16_LOOP 0
+ DIFF_INT16_LOOP u
-%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
+%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
add wq, wq
add srcq, wq
add dstq, wq
neg wq
%%.loop:
-%if %2
- mova m1, [srcq+wq]
-%else
- movu m1, [srcq+wq]
-%endif
+ mov%2 m1, [srcq+wq]
mova m2, m1
pslld m1, 16
paddw m1, m2
@@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
%endif
paddw m0, m1
pand m0, m7
-%if %1
+%ifidn %1, a
mova [dstq+wq], m0
%else
movq [dstq+wq], m0
@@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
psllq m0, 48
movd m7, maskm
SPLATW m7 ,m7
- ADD_HFYU_LEFT_LOOP_INT16 1, 1
+ ADD_HFYU_LEFT_LOOP_INT16 a, a
INIT_XMM sse4
cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
@@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
jnz .src_unaligned
test dstq, 15
jnz .dst_unaligned
- ADD_HFYU_LEFT_LOOP_INT16 1, 1
+ ADD_HFYU_LEFT_LOOP_INT16 a, a
.dst_unaligned:
- ADD_HFYU_LEFT_LOOP_INT16 0, 1
+ ADD_HFYU_LEFT_LOOP_INT16 u, a
.src_unaligned:
- ADD_HFYU_LEFT_LOOP_INT16 0, 0
+ ADD_HFYU_LEFT_LOOP_INT16 u, u
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX mmxext
More information about the ffmpeg-cvslog
mailing list