[FFmpeg-devel] [PATCH] lavc/aarch64/simple_idct: separate macro arguments with commas

Matthieu Bouron matthieu.bouron at gmail.com
Thu May 11 15:31:53 EEST 2017


On Wed, May 10, 2017 at 08:23:02PM +0200, Matthieu Bouron wrote:
> On Tue, May 09, 2017 at 11:08:48PM +0200, Matthieu Bouron wrote:
> > On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bouron at gmail.com>
> > wrote:
> > 
> > >
> > >
> > > Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fouet at free.fr> a écrit :
> > >
> > > Hi,
> > >
> > >
> > > On 28/04/2017 21:58, Matthieu Bouron wrote:
> > > > Untested: fixes ticket #6324.
> > > > ---
> > > >  libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------
> > > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/libavcodec/aarch64/simple_idct_neon.S
> > > b/libavcodec/aarch64/simple_idct_neon.S
> > > > index 52273420f9..d31f72a609 100644
> > > > --- a/libavcodec/aarch64/simple_idct_neon.S
> > > > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > > > @@ -61,19 +61,19 @@ endconst
> > > >          br              x10
> > > >  .endm
> > > >
> > > > -.macro smull1 a b c
> > > > +.macro smull1 a, b, c
> > > >          smull           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro smlal1 a b c
> > > > +.macro smlal1 a, b, c
> > > >          smlal           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro smlsl1 a b c
> > > > +.macro smlsl1 a, b, c
> > > >          smlsl           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro idct_col4_top y1 y2 y3 y4 i l
> > > > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > > >          smull\i         v7.4S,  \y3\().\l, z2
> > > >          smull\i         v16.4S, \y3\().\l, z6
> > > >          smull\i         v17.4S, \y2\().\l, z1
> > > > @@ -91,7 +91,7 @@ endconst
> > > >          smlsl\i         v6.4S,  \y4\().\l, z5
> > > >  .endm
> > > >
> > > > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > > > +.macro idct_row4_neon y1, y2, y3, y4, pass
> > > >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> > > >          movi            v23.4S, #1<<2, lsl #8
> > > >          orr             v5.16B, \y1\().16B, \y2\().16B
> > > > @@ -153,7 +153,7 @@ endconst
> > > >          trn2            \y4\().4S, v17.4S, v19.4S
> > > >  .endm
> > > >
> > > > -.macro declare_idct_col4_neon i l
> > > > +.macro declare_idct_col4_neon i, l
> > > >  function idct_col4_neon\i
> > > >          dup             v23.4H, z4c
> > > >  .if \i == 1
> > >
> > > Sounds sane, but shouldn't we be doing this for all instances of
> > > multiple arguments macros without commas?
> > >
> > >
> > > Sure, I may have missed some. I will work again on this patch on Tuesday
> > > as I will have access to an apple machine (and hopefully fix the build
> > > without gas-preprocessor).
> > >
> > > Sorry for the delay,
> > > Matthieu
> > >
> > >
> > Updated patch attached:
> >   * add missing commas to separate macro arguments
> >   * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form
> > being interpreted as an hexadecimal value, ie: 4/8).
> 
> > From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001
> > From: Matthieu Bouron <matthieu.bouron at gmail.com>
> > Date: Fri, 28 Apr 2017 21:58:55 +0200
> > Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without
> >  gas-preprocessor
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Separates macro arguments with commas and passes .4H/.8H as macro
> > arguments instead of 4H/8H (the later form being interpreted as an
> > hexadecimal value).
> > 
> > Fixes ticket #6324.
> > 
> > Suggested-by: Martin Storsjö <martin at martin.st>
> > ---
> >  libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------
> >  1 file changed, 37 insertions(+), 37 deletions(-)
> > 
> > diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S
> > index 52273420f9..92987985d2 100644
> > --- a/libavcodec/aarch64/simple_idct_neon.S
> > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > @@ -61,37 +61,37 @@ endconst
> >          br              x10
> >  .endm
> >  
> > -.macro smull1 a b c
> > +.macro smull1 a, b, c
> >          smull           \a, \b, \c
> >  .endm
> >  
> > -.macro smlal1 a b c
> > +.macro smlal1 a, b, c
> >          smlal           \a, \b, \c
> >  .endm
> >  
> > -.macro smlsl1 a b c
> > +.macro smlsl1 a, b, c
> >          smlsl           \a, \b, \c
> >  .endm
> >  
> > -.macro idct_col4_top y1 y2 y3 y4 i l
> > -        smull\i         v7.4S,  \y3\().\l, z2
> > -        smull\i         v16.4S, \y3\().\l, z6
> > -        smull\i         v17.4S, \y2\().\l, z1
> > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > +        smull\i         v7.4S,  \y3\l, z1
> > +        smull\i         v16.4S, \y3\l, z6
> > +        smull\i         v17.4S, \y2\l, z1
> >          add             v19.4S, v23.4S, v7.4S
> > -        smull\i         v18.4S, \y2\().\l, z3
> > +        smull\i         v18.4S, \y2\l, z3
> >          add             v20.4S, v23.4S, v16.4S
> > -        smull\i         v5.4S,  \y2\().\l, z5
> > +        smull\i         v5.4S,  \y2\l, z5
> >          sub             v21.4S, v23.4S, v16.4S
> > -        smull\i         v6.4S,  \y2\().\l, z7
> > +        smull\i         v6.4S,  \y2\l, z7
> >          sub             v22.4S, v23.4S, v7.4S
> >  
> > -        smlal\i         v17.4S, \y4\().\l, z3
> > -        smlsl\i         v18.4S, \y4\().\l, z7
> > -        smlsl\i         v5.4S,  \y4\().\l, z1
> > -        smlsl\i         v6.4S,  \y4\().\l, z5
> > +        smlal\i         v17.4S, \y4\l, z3
> > +        smlsl\i         v18.4S, \y4\l, z7
> > +        smlsl\i         v5.4S,  \y4\l, z1
> > +        smlsl\i         v6.4S,  \y4\l, z5
> >  .endm
> >  
> > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > +.macro idct_row4_neon y1, y2, y3, y4, pass
> >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> >          movi            v23.4S, #1<<2, lsl #8
> >          orr             v5.16B, \y1\().16B, \y2\().16B
> > @@ -101,7 +101,7 @@ endconst
> >          mov             x3, v5.D[1]
> >          smlal           v23.4S, \y1\().4H, z4
> >  
> > -        idct_col4_top   \y1 \y2 \y3 \y4 1 4H
> > +        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
> >  
> >          cmp             x3, #0
> >          beq             \pass\()f
> > @@ -153,7 +153,7 @@ endconst
> >          trn2            \y4\().4S, v17.4S, v19.4S
> >  .endm
> >  
> > -.macro declare_idct_col4_neon i l
> > +.macro declare_idct_col4_neon i, l
> >  function idct_col4_neon\i
> >          dup             v23.4H, z4c
> >  .if \i == 1
> > @@ -164,14 +164,14 @@ function idct_col4_neon\i
> >  .endif
> >          smull           v23.4S, v23.4H, z4
> >  
> > -        idct_col4_top   v24 v25 v26 v27 \i \l
> > +        idct_col4_top   v24, v25, v26, v27, \i, \l
> >  
> >          mov             x4, v28.D[\i - 1]
> >          mov             x5, v29.D[\i - 1]
> >          cmp             x4, #0
> >          beq             1f
> >  
> > -        smull\i         v7.4S,  v28.\l, z4
> > +        smull\i         v7.4S,  v28\l,  z4
> >          add             v19.4S, v19.4S, v7.4S
> >          sub             v20.4S, v20.4S, v7.4S
> >          sub             v21.4S, v21.4S, v7.4S
> > @@ -181,17 +181,17 @@ function idct_col4_neon\i
> >          cmp             x5, #0
> >          beq             2f
> >  
> > -        smlal\i         v17.4S, v29.\l, z5
> > -        smlsl\i         v18.4S, v29.\l, z1
> > -        smlal\i         v5.4S,  v29.\l, z7
> > -        smlal\i         v6.4S,  v29.\l, z3
> > +        smlal\i         v17.4S, v29\l, z5
> > +        smlsl\i         v18.4S, v29\l, z1
> > +        smlal\i         v5.4S,  v29\l, z7
> > +        smlal\i         v6.4S,  v29\l, z3
> >  
> >  2:      mov             x5, v31.D[\i - 1]
> >          cmp             x4, #0
> >          beq             3f
> >  
> > -        smull\i         v7.4S,  v30.\l, z6
> > -        smull\i         v16.4S, v30.\l, z2
> > +        smull\i         v7.4S,  v30\l, z6
> > +        smull\i         v16.4S, v30\l, z2
> >          add             v19.4S, v19.4S, v7.4S
> >          sub             v22.4S, v22.4S, v7.4S
> >          sub             v20.4S, v20.4S, v16.4S
> > @@ -200,10 +200,10 @@ function idct_col4_neon\i
> >  3:      cmp             x5, #0
> >          beq             4f
> >  
> > -        smlal\i         v17.4S, v31.\l, z7
> > -        smlsl\i         v18.4S, v31.\l, z5
> > -        smlal\i         v5.4S,  v31.\l, z3
> > -        smlsl\i         v6.4S,  v31.\l, z1
> > +        smlal\i         v17.4S, v31\l, z7
> > +        smlsl\i         v18.4S, v31\l, z5
> > +        smlal\i         v5.4S,  v31\l, z3
> > +        smlsl\i         v6.4S,  v31\l, z1
> >  
> >  4:      addhn           v7.4H, v19.4S, v17.4S
> >          addhn2          v7.8H, v20.4S, v18.4S
> > @@ -219,14 +219,14 @@ function idct_col4_neon\i
> >  endfunc
> >  .endm
> >  
> > -declare_idct_col4_neon 1 4H
> > -declare_idct_col4_neon 2 8H
> > +declare_idct_col4_neon 1, .4H
> > +declare_idct_col4_neon 2, .8H
> >  
> >  function ff_simple_idct_put_neon, export=1
> >          idct_start      x2
> >  
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          bl              idct_col4_neon1
> >  
> >          sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
> > @@ -263,8 +263,8 @@ endfunc
> >  function ff_simple_idct_add_neon, export=1
> >          idct_start      x2
> >  
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          bl              idct_col4_neon1
> >  
> >          sshr            v1.8H, V7.8H, #COL_SHIFT-16
> > @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1
> >          idct_start      x0
> >  
> >          mov             x2,  x0
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          add             x2, x2, #-128
> >          bl              idct_col4_neon1
> >  
> > -- 
> > 2.12.0
> > 
> 
> If there is no objection, I will push the patch tomorrow.

Patch applied.


More information about the ffmpeg-devel mailing list