[FFmpeg-cvslog] r25262 - trunk/libavcodec/x86/h264dsp_mmx.c

rbultje subversion
Wed Sep 29 19:42:26 CEST 2010


Author: rbultje
Date: Wed Sep 29 19:42:26 2010
New Revision: 25262

Log:
Move static inline function to a macro, so that constant propagation in
inline asm works for gcc-3.x also (hopefully). Should fix gcc-3.x FATE
breakage after r25254.

Modified:
   trunk/libavcodec/x86/h264dsp_mmx.c

Modified: trunk/libavcodec/x86/h264dsp_mmx.c
==============================================================================
--- trunk/libavcodec/x86/h264dsp_mmx.c	Wed Sep 29 17:43:37 2010	(r25261)
+++ trunk/libavcodec/x86/h264dsp_mmx.c	Wed Sep 29 19:42:26 2010	(r25262)
@@ -63,123 +63,119 @@ void ff_h264_idct_add8_sse2      (uint8_
 /***********************************/
 /* deblocking */
 
-static av_always_inline void h264_loop_filter_strength_iteration_mmx2(int16_t bS[2][4][4], uint8_t nnz[40],
-                                                                      int8_t ref[2][40],   int16_t mv[2][40][2],
-                                                                      int bidir,   int edges, int step,
-                                                                      int mask_mv, int dir, const int d_idx,
-                                                                      const uint64_t mask_dir)
-{
-        x86_reg b_idx;
-        mask_mv <<= 3;
-        for( b_idx=0; b_idx<edges; b_idx+=step ) {
-            if (!mask_dir)
-            __asm__ volatile(
-                    "pxor %%mm0, %%mm0 \n\t"
-                    ::
-            );
-            if(!(mask_mv & b_idx)) {
-                if(bidir) {
-                    __asm__ volatile(
-                        "movd         %a3(%0,%2), %%mm2 \n"
-                        "punpckldq    %a4(%0,%2), %%mm2 \n" // { ref0[bn], ref1[bn] }
-                        "pshufw $0x44, 12(%0,%2), %%mm0 \n" // { ref0[b], ref0[b] }
-                        "pshufw $0x44, 52(%0,%2), %%mm1 \n" // { ref1[b], ref1[b] }
-                        "pshufw $0x4E, %%mm2, %%mm3 \n"
-                        "psubb         %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] }
-                        "psubb         %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] }
-
-                        "por           %%mm1, %%mm0 \n"
-                        "movq   %a5(%1,%2,4), %%mm1 \n"
-                        "movq   %a6(%1,%2,4), %%mm2 \n"
-                        "movq          %%mm1, %%mm3 \n"
-                        "movq          %%mm2, %%mm4 \n"
-                        "psubw   48(%1,%2,4), %%mm1 \n"
-                        "psubw   56(%1,%2,4), %%mm2 \n"
-                        "psubw  208(%1,%2,4), %%mm3 \n"
-                        "psubw  216(%1,%2,4), %%mm4 \n"
-                        "packsswb      %%mm2, %%mm1 \n"
-                        "packsswb      %%mm4, %%mm3 \n"
-                        "paddb         %%mm6, %%mm1 \n"
-                        "paddb         %%mm6, %%mm3 \n"
-                        "psubusb       %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
-                        "psubusb       %%mm5, %%mm3 \n"
-                        "packsswb      %%mm3, %%mm1 \n"
-
-                        "por           %%mm1, %%mm0 \n"
-                        "movq   %a7(%1,%2,4), %%mm1 \n"
-                        "movq   %a8(%1,%2,4), %%mm2 \n"
-                        "movq          %%mm1, %%mm3 \n"
-                        "movq          %%mm2, %%mm4 \n"
-                        "psubw   48(%1,%2,4), %%mm1 \n"
-                        "psubw   56(%1,%2,4), %%mm2 \n"
-                        "psubw  208(%1,%2,4), %%mm3 \n"
-                        "psubw  216(%1,%2,4), %%mm4 \n"
-                        "packsswb      %%mm2, %%mm1 \n"
-                        "packsswb      %%mm4, %%mm3 \n"
-                        "paddb         %%mm6, %%mm1 \n"
-                        "paddb         %%mm6, %%mm3 \n"
-                        "psubusb       %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
-                        "psubusb       %%mm5, %%mm3 \n"
-                        "packsswb      %%mm3, %%mm1 \n"
-
-                        "pshufw $0x4E, %%mm1, %%mm1 \n"
-                        "por           %%mm1, %%mm0 \n"
-                        "pshufw $0x4E, %%mm0, %%mm1 \n"
-                        "pminub        %%mm1, %%mm0 \n"
-                        ::"r"(ref),
-                          "r"(mv),
-                          "r"(b_idx),
-                          "i"(d_idx+12),
-                          "i"(d_idx+52),
-                          "i"(d_idx*4+48),
-                          "i"(d_idx*4+56),
-                          "i"(d_idx*4+208),
-                          "i"(d_idx*4+216)
-                    );
-                } else {
-                    __asm__ volatile(
-                        "movd   12(%0,%2), %%mm0 \n"
-                        "psubb %a3(%0,%2), %%mm0 \n" // ref[b] != ref[bn]
-                        "movq   48(%1,%2,4), %%mm1 \n"
-                        "movq   56(%1,%2,4), %%mm2 \n"
-                        "psubw %a4(%1,%2,4), %%mm1 \n"
-                        "psubw %a5(%1,%2,4), %%mm2 \n"
-                        "packsswb   %%mm2, %%mm1 \n"
-                        "paddb      %%mm6, %%mm1 \n"
-                        "psubusb    %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
-                        "packsswb   %%mm1, %%mm1 \n"
-                        "por        %%mm1, %%mm0 \n"
-                        ::"r"(ref),
-                          "r"(mv),
-                          "r"(b_idx),
-                          "i"(d_idx+12),
-                          "i"(d_idx*4+48),
-                          "i"(d_idx*4+56)
-                    );
-                }
-            }
-            __asm__ volatile(
-                "movd 12(%0,%1), %%mm1 \n"
-                "por %a2(%0,%1), %%mm1 \n" // nnz[b] || nnz[bn]
-                ::"r"(nnz),
-                  "r"(b_idx),
-                  "i"(d_idx+12)
-            );
-            __asm__ volatile(
-                "pminub    %%mm7, %%mm1 \n"
-                "pminub    %%mm7, %%mm0 \n"
-                "psllw        $1, %%mm1 \n"
-                "pxor      %%mm2, %%mm2 \n"
-                "pmaxub    %%mm0, %%mm1 \n"
-                "punpcklbw %%mm2, %%mm1 \n"
-                "movq      %%mm1, %a1(%0,%2) \n"
-                ::"r"(bS),
-                  "i"(32*dir),
-                  "r"(b_idx)
-                :"memory"
-            );
-        }
-}
+#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
+    do { \
+        x86_reg b_idx; \
+        mask_mv <<= 3; \
+        for( b_idx=0; b_idx<edges; b_idx+=step ) { \
+            if (!mask_dir) \
+            __asm__ volatile( \
+                    "pxor %%mm0, %%mm0 \n\t" \
+                    :: \
+            ); \
+            if(!(mask_mv & b_idx)) { \
+                if(bidir) { \
+                    __asm__ volatile( \
+                        "movd         %a3(%0,%2), %%mm2 \n" \
+                        "punpckldq    %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
+                        "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
+                        "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
+                        "pshufw $0x4E, %%mm2, %%mm3 \n" \
+                        "psubb         %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
+                        "psubb         %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
+ \
+                        "por           %%mm1, %%mm0 \n" \
+                        "movq   %a5(%1,%2,4), %%mm1 \n" \
+                        "movq   %a6(%1,%2,4), %%mm2 \n" \
+                        "movq          %%mm1, %%mm3 \n" \
+                        "movq          %%mm2, %%mm4 \n" \
+                        "psubw   48(%1,%2,4), %%mm1 \n" \
+                        "psubw   56(%1,%2,4), %%mm2 \n" \
+                        "psubw  208(%1,%2,4), %%mm3 \n" \
+                        "psubw  216(%1,%2,4), %%mm4 \n" \
+                        "packsswb      %%mm2, %%mm1 \n" \
+                        "packsswb      %%mm4, %%mm3 \n" \
+                        "paddb         %%mm6, %%mm1 \n" \
+                        "paddb         %%mm6, %%mm3 \n" \
+                        "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+                        "psubusb       %%mm5, %%mm3 \n" \
+                        "packsswb      %%mm3, %%mm1 \n" \
+ \
+                        "por           %%mm1, %%mm0 \n" \
+                        "movq   %a7(%1,%2,4), %%mm1 \n" \
+                        "movq   %a8(%1,%2,4), %%mm2 \n" \
+                        "movq          %%mm1, %%mm3 \n" \
+                        "movq          %%mm2, %%mm4 \n" \
+                        "psubw   48(%1,%2,4), %%mm1 \n" \
+                        "psubw   56(%1,%2,4), %%mm2 \n" \
+                        "psubw  208(%1,%2,4), %%mm3 \n" \
+                        "psubw  216(%1,%2,4), %%mm4 \n" \
+                        "packsswb      %%mm2, %%mm1 \n" \
+                        "packsswb      %%mm4, %%mm3 \n" \
+                        "paddb         %%mm6, %%mm1 \n" \
+                        "paddb         %%mm6, %%mm3 \n" \
+                        "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+                        "psubusb       %%mm5, %%mm3 \n" \
+                        "packsswb      %%mm3, %%mm1 \n" \
+ \
+                        "pshufw $0x4E, %%mm1, %%mm1 \n" \
+                        "por           %%mm1, %%mm0 \n" \
+                        "pshufw $0x4E, %%mm0, %%mm1 \n" \
+                        "pminub        %%mm1, %%mm0 \n" \
+                        ::"r"(ref), \
+                          "r"(mv), \
+                          "r"(b_idx), \
+                          "i"(d_idx+12), \
+                          "i"(d_idx+52), \
+                          "i"(d_idx*4+48), \
+                          "i"(d_idx*4+56), \
+                          "i"(d_idx*4+208), \
+                          "i"(d_idx*4+216) \
+                    ); \
+                } else { \
+                    __asm__ volatile( \
+                        "movd   12(%0,%2), %%mm0 \n" \
+                        "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
+                        "movq   48(%1,%2,4), %%mm1 \n" \
+                        "movq   56(%1,%2,4), %%mm2 \n" \
+                        "psubw %a4(%1,%2,4), %%mm1 \n" \
+                        "psubw %a5(%1,%2,4), %%mm2 \n" \
+                        "packsswb   %%mm2, %%mm1 \n" \
+                        "paddb      %%mm6, %%mm1 \n" \
+                        "psubusb    %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+                        "packsswb   %%mm1, %%mm1 \n" \
+                        "por        %%mm1, %%mm0 \n" \
+                        ::"r"(ref), \
+                          "r"(mv), \
+                          "r"(b_idx), \
+                          "i"(d_idx+12), \
+                          "i"(d_idx*4+48), \
+                          "i"(d_idx*4+56) \
+                    ); \
+                } \
+            } \
+            __asm__ volatile( \
+                "movd 12(%0,%1), %%mm1 \n" \
+                "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
+                ::"r"(nnz), \
+                  "r"(b_idx), \
+                  "i"(d_idx+12) \
+            ); \
+            __asm__ volatile( \
+                "pminub    %%mm7, %%mm1 \n" \
+                "pminub    %%mm7, %%mm0 \n" \
+                "psllw        $1, %%mm1 \n" \
+                "pxor      %%mm2, %%mm2 \n" \
+                "pmaxub    %%mm0, %%mm1 \n" \
+                "punpcklbw %%mm2, %%mm1 \n" \
+                "movq      %%mm1, %a1(%0,%2) \n" \
+                ::"r"(bS), \
+                  "i"(32*dir), \
+                  "r"(b_idx) \
+                :"memory" \
+            ); \
+        } \
+    } while (0)
 
 static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
                                             int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {



More information about the ffmpeg-cvslog mailing list