[Ffmpeg-devel] Mac OS X Intel last part: balign directives

John Dalgliesh johnd
Fri Aug 11 08:00:14 CEST 2006


Hi,

On Thu, 10 Aug 2006, Luca Abeni wrote:
>
> I think Rich Felker described the correct solution:
> ---
> The solution is to make a test in configure to test the behavior of
> align. First try assembling a file with .balign or .p2align. If one
> fails try the other. If both fail, first try .align 3. This will fail
> if .align takes its argument in bytes rather than as the number of low
> bits that must be zero; if it does try .align 8. If they all fail,
> disable alignment altogether and hope it's not needed. :) Then include
> a definition of BALIGN_8 and BALIGN_16 in config.h.
> ---
>
> So, configure should write the right "#define ASMALIGN8  ..." and
> "#define ASMALIGN16 ..." in config.h

Thanks, patch attached: balign-generalise.patch.
Determines whether .align's arg is power-of-two or not, then defines 
ASMALIGN appropriately in config.h. Changes all .baligns to ASMALIGNs.

Notes:
- doesn't bother with .balign or .p2align; may as well use the standard 
.align directive (once we know what it does) if we're going to the trouble 
of testing for stuff.
- ASMALIGN's arg is ZEROBITS, i.e. num of low bits in addr to make zero. I 
wanted a general ASMALIGN macro, and 1<< is easier than log2/cntlzw/bsr.

Tested on linux and OS X, no regressions broken, .o's are identical. On 
linux despite indications I could see in gas source, .align is NPOT. On OS 
X it is POT.

Comments?

I have also attached another patch for comparison, the minimally intrusive 
option 2 I was talking about before: balign-minimal.patch.
It also passes regressions and .o's are the same (when disassembled - 
source line num changes affect debugging info).

Oh and I have left the commented-out .baligns in libswscale alone... not 
sure how being in an external repo complicates things, and, well they are 
commented out already.

{P^/
-------------- next part --------------
Index: configure
===================================================================
--- configure	(revision 5978)
+++ configure	(working copy)
@@ -468,6 +468,7 @@
 swscaler="no"
 gpl="no"
 memalignhack="no"
+asmalign_pot="unknown"
 
 # OS specific
 targetos=`uname -s`
@@ -1469,6 +1470,12 @@
     LDFLAGS="$LDFLAGS -p"
 fi
 
+# find if .align arg is power-of-two or not
+if test $asmalign_pot = "unknown"; then
+    asmalign_pot="no"
+    echo 'asm (".align 3");' | check_cc && asmalign_pot="yes"
+fi
+
 echo "install prefix   $PREFIX"
 echo "source path      $source_path"
 echo "C compiler       $cc"
@@ -1535,6 +1542,7 @@
 if test "$network" = "yes" ; then
     echo "IPv6 support         $ipv6"
 fi
+echo ".align is power-of-two" $asmalign_pot
 if test "$gpl" = "no" ; then
     echo "License: LGPL"
 else
@@ -2096,7 +2104,13 @@
   echo "AMR_CFLAGS=-DIF2=1" >> config.mak
 fi
 
+if test "$asmalign_pot" = "yes" ; then
+  echo '#define ASMALIGN(ZEROBITS) ".align " #ZEROBITS "\n\t"' >> $TMPH
+else
+  echo '#define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"' >> $TMPH
+fi
 
+
 for codec in $DECODER_LIST $ENCODER_LIST $PARSER_LIST $DEMUXER_LIST $MUXER_LIST; do
     echo "#define CONFIG_`echo $codec | tr a-z A-Z` 1" >> $TMPH
     echo "CONFIG_`echo $codec | tr a-z A-Z`=yes" >> config.mak
Index: libavcodec/i386/mpegvideo_mmx.c
===================================================================
--- libavcodec/i386/mpegvideo_mmx.c	(revision 5978)
+++ libavcodec/i386/mpegvideo_mmx.c	(working copy)
@@ -66,7 +66,7 @@
                 "packssdw %%mm5, %%mm5          \n\t"
                 "psubw %%mm5, %%mm7             \n\t"
                 "pxor %%mm4, %%mm4              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %3), %%mm0           \n\t"
                 "movq 8(%0, %3), %%mm1          \n\t"
@@ -129,7 +129,7 @@
                 "packssdw %%mm5, %%mm5          \n\t"
                 "psubw %%mm5, %%mm7             \n\t"
                 "pxor %%mm4, %%mm4              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %3), %%mm0           \n\t"
                 "movq 8(%0, %3), %%mm1          \n\t"
@@ -222,7 +222,7 @@
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "mov %3, %%"REG_a"              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -285,7 +285,7 @@
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "mov %3, %%"REG_a"              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -357,7 +357,7 @@
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "mov %3, %%"REG_a"              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
@@ -418,7 +418,7 @@
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "mov %3, %%"REG_a"              \n\t"
-                ".balign 16                     \n\t"
+                ASMALIGN(4)
                 "1:                             \n\t"
                 "movq (%0, %%"REG_a"), %%mm0    \n\t"
                 "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
Index: libavcodec/i386/motion_est_mmx.c
===================================================================
--- libavcodec/i386/motion_est_mmx.c	(revision 5978)
+++ libavcodec/i386/motion_est_mmx.c	(working copy)
@@ -34,7 +34,7 @@
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
@@ -70,7 +70,7 @@
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
@@ -92,7 +92,7 @@
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
         "movq (%2, %%"REG_a"), %%mm2    \n\t"
@@ -118,7 +118,7 @@
 { //FIXME reuse src
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "movq "MANGLE(bone)", %%mm5     \n\t"
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
@@ -155,7 +155,7 @@
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
         "movq (%2, %%"REG_a"), %%mm1    \n\t"
@@ -193,7 +193,7 @@
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
         "movq (%2, %%"REG_a"), %%mm1    \n\t"
Index: libavcodec/i386/dsputil_mmx.c
===================================================================
--- libavcodec/i386/dsputil_mmx.c	(revision 5978)
+++ libavcodec/i386/dsputil_mmx.c	(working copy)
@@ -56,7 +56,7 @@
 static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL;
 static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
 
-#define JUMPALIGN() __asm __volatile (".balign 8"::)
+#define JUMPALIGN() __asm __volatile (ASMALIGN(3)::)
 #define MOVQ_ZERO(regd)  __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
 
 #define MOVQ_WONE(regd) \
@@ -204,7 +204,7 @@
     asm volatile(
         "mov $-128, %%"REG_a"           \n\t"
         "pxor %%mm7, %%mm7              \n\t"
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%0), %%mm0               \n\t"
         "movq (%0, %2), %%mm2           \n\t"
@@ -232,7 +232,7 @@
     asm volatile(
         "pxor %%mm7, %%mm7              \n\t"
         "mov $-128, %%"REG_a"           \n\t"
-        ".balign 16                     \n\t"
+        ASMALIGN(4)
         "1:                             \n\t"
         "movq (%0), %%mm0               \n\t"
         "movq (%1), %%mm2               \n\t"
@@ -375,7 +375,7 @@
 {
     __asm __volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
-         ".balign 8                     \n\t"
+         ASMALIGN(3)
          "1:                            \n\t"
          "movd (%1), %%mm0              \n\t"
          "movd (%1, %3), %%mm1          \n\t"
@@ -401,7 +401,7 @@
 {
     __asm __volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
-         ".balign 8                     \n\t"
+         ASMALIGN(3)
          "1:                            \n\t"
          "movq (%1), %%mm0              \n\t"
          "movq (%1, %3), %%mm1          \n\t"
@@ -427,7 +427,7 @@
 {
     __asm __volatile(
          "lea (%3, %3), %%"REG_a"       \n\t"
-         ".balign 8                     \n\t"
+         ASMALIGN(3)
          "1:                            \n\t"
          "movq (%1), %%mm0              \n\t"
          "movq 8(%1), %%mm4             \n\t"
Index: libavcodec/i386/dsputil_mmx_avg.h
===================================================================
--- libavcodec/i386/dsputil_mmx_avg.h	(revision 5978)
+++ libavcodec/i386/dsputil_mmx_avg.h	(working copy)
@@ -754,7 +754,7 @@
         "lea (%3, %3), %%"REG_a"        \n\t"
         "movq (%1), %%mm0               \n\t"
         PAVGB" 1(%1), %%mm0             \n\t"
-        ".balign 8                      \n\t"
+         ASMALIGN(3)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm2    \n\t"
         "movq (%1, %3), %%mm1           \n\t"
Index: libavcodec/i386/dsputil_mmx_rnd.h
===================================================================
--- libavcodec/i386/dsputil_mmx_rnd.h	(revision 5978)
+++ libavcodec/i386/dsputil_mmx_rnd.h	(working copy)
@@ -28,7 +28,7 @@
     MOVQ_BFE(mm6);
     __asm __volatile(
         "lea    (%3, %3), %%"REG_a"     \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   1(%1), %%mm1            \n\t"
@@ -69,7 +69,7 @@
         "movq   %%mm4, (%3)             \n\t"
         "add    %5, %3                  \n\t"
         "decl   %0                      \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   (%2), %%mm1             \n\t"
@@ -110,7 +110,7 @@
     MOVQ_BFE(mm6);
     __asm __volatile(
         "lea        (%3, %3), %%"REG_a" \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   1(%1), %%mm1            \n\t"
@@ -168,7 +168,7 @@
         "movq   %%mm5, 8(%3)            \n\t"
         "add    %5, %3                  \n\t"
         "decl   %0                      \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   (%2), %%mm1             \n\t"
@@ -206,7 +206,7 @@
     __asm __volatile(
         "lea (%3, %3), %%"REG_a"        \n\t"
         "movq (%1), %%mm0               \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1, %3), %%mm1         \n\t"
         "movq   (%1, %%"REG_a"),%%mm2   \n\t"
@@ -246,7 +246,7 @@
         "paddusw %%mm1, %%mm5           \n\t"
         "xor    %%"REG_a", %%"REG_a"    \n\t"
         "add    %3, %1                  \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1, %%"REG_a"), %%mm0  \n\t"
         "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
@@ -458,7 +458,7 @@
     __asm __volatile(
         "lea    (%3, %3), %%"REG_a"     \n\t"
         "movq   (%1), %%mm0             \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1, %3), %%mm1         \n\t"
         "movq   (%1, %%"REG_a"), %%mm2  \n\t"
@@ -509,7 +509,7 @@
         "paddusw %%mm1, %%mm5           \n\t"
         "xor    %%"REG_a", %%"REG_a"    \n\t"
         "add    %3, %1                  \n\t"
-        ".balign 8                      \n\t"
+        ASMALIGN(3)
         "1:                             \n\t"
         "movq   (%1, %%"REG_a"), %%mm0  \n\t"
         "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
Index: libavcodec/i386/simple_idct_mmx.c
===================================================================
--- libavcodec/i386/simple_idct_mmx.c	(revision 5978)
+++ libavcodec/i386/simple_idct_mmx.c	(working copy)
@@ -785,7 +785,7 @@
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
         "jmp 9f                         \n\t"
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "4:                             \n\t"
 Z_COND_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
@@ -860,7 +860,7 @@
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
         "jmp 9f                         \n\t"
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "6:                             \n\t"
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
 
@@ -926,7 +926,7 @@
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
         "jmp 9f                         \n\t"
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "2:                             \n\t"
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
 
@@ -1003,7 +1003,7 @@
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
         "jmp 9f                         \n\t"
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "3:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, shift) \
@@ -1067,7 +1067,7 @@
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
         "jmp 9f                         \n\t"
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "5:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, shift) \
@@ -1132,7 +1132,7 @@
         "jmp 9f                         \n\t"
 
 
-        "#.balign 16                    \n\t"\
+        "#" ASMALIGN(4)                      \
         "1:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, shift) \
@@ -1206,7 +1206,7 @@
         "jmp 9f                         \n\t"
 
 
-        "#.balign 16                    \n\t"
+        "#" ASMALIGN(4)
         "7:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, shift) \
Index: libavcodec/i386/mpegvideo_mmx_template.c
===================================================================
--- libavcodec/i386/mpegvideo_mmx_template.c	(revision 5978)
+++ libavcodec/i386/mpegvideo_mmx_template.c	(working copy)
@@ -112,7 +112,7 @@
             "pxor %%mm6, %%mm6                  \n\t"
             "psubw (%3), %%mm6                  \n\t" // -bias[0]
             "mov $-128, %%"REG_a"               \n\t"
-            ".balign 16                         \n\t"
+            ASMALIGN(4)
             "1:                                 \n\t"
             "pxor %%mm1, %%mm1                  \n\t" // 0
             "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
@@ -156,7 +156,7 @@
             "pxor %%mm7, %%mm7                  \n\t" // 0
             "pxor %%mm4, %%mm4                  \n\t" // 0
             "mov $-128, %%"REG_a"               \n\t"
-            ".balign 16                         \n\t"
+            ASMALIGN(4)
             "1:                                 \n\t"
             "pxor %%mm1, %%mm1                  \n\t" // 0
             "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
-------------- next part --------------
Index: libavutil/common.h
===================================================================
--- libavutil/common.h	(revision 5978)
+++ libavutil/common.h	(working copy)
@@ -358,4 +358,25 @@
 void *av_realloc(void *ptr, unsigned int size);
 void av_free(void *ptr);
 
+#ifdef __APPLE__
+#define BALIGN_COMPAT asm(     \
+    ".macro .balign\n\t"       \
+    ".if $0 == (1<<1)\n\t" \
+    ".p2align 1\n\t"           \
+    ".elseif $0 == (1<<2)\n\t" \
+    ".p2align 2\n\t"           \
+    ".elseif $0 == (1<<3)\n\t" \
+    ".p2align 3\n\t"           \
+    ".elseif $0 == (1<<4)\n\t" \
+    ".p2align 4\n\t"           \
+    ".elseif $0 == (1<<5)\n\t" \
+    ".p2align 5\n\t"           \
+    ".else\n\t"                \
+    ".abort \".balign argument not recognised\"\n\t" \
+    ".endif\n\t"               \
+    ".endmacro\n\t" );
+#else
+#define BALIGN_COMPAT
+#endif
+
 #endif /* COMMON_H */
Index: libavcodec/i386/mpegvideo_mmx.c
===================================================================
--- libavcodec/i386/mpegvideo_mmx.c	(revision 5978)
+++ libavcodec/i386/mpegvideo_mmx.c	(working copy)
@@ -25,6 +25,8 @@
 #include "../avcodec.h"
 #include "x86_cpu.h"
 
+BALIGN_COMPAT
+
 extern uint8_t zigzag_direct_noperm[64];
 extern uint16_t inv_zigzag_direct16[64];
 
Index: libavcodec/i386/motion_est_mmx.c
===================================================================
--- libavcodec/i386/motion_est_mmx.c	(revision 5978)
+++ libavcodec/i386/motion_est_mmx.c	(working copy)
@@ -22,6 +22,8 @@
 #include "../dsputil.h"
 #include "x86_cpu.h"
 
+BALIGN_COMPAT
+
 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
 0x0000000000000000ULL,
 0x0001000100010001ULL,
Index: libavcodec/i386/dsputil_mmx.c
===================================================================
--- libavcodec/i386/dsputil_mmx.c	(revision 5978)
+++ libavcodec/i386/dsputil_mmx.c	(working copy)
@@ -26,6 +26,8 @@
 #include "x86_cpu.h"
 #include "mmx.h"
 
+BALIGN_COMPAT
+
 //#undef NDEBUG
 //#include <assert.h>
 



More information about the ffmpeg-devel mailing list