[FFmpeg-devel] [PATCH] M68K: Optimized MUL64/MULH/MULLfunctionsfor 68060

ami_stuff ami_stuff
Mon Aug 3 11:12:12 CEST 2009


> >> >     :"d2", "d3", "d4", "d5");
> >> 
> >> Avoid using hardcoded registers, and prefer explicitly declared temp
> >> variables.
> >
> > Hmm, I don't know how to do it
> 
> int t1, t2, t3, t4;
> asm("..." : "=&d"(t1), "=&d"(t2), "=&d"(t3), "=&d"(t4));
> 
> > and what code GCC will generate after this change.
> 
> Try and see.

You mean something like this?

inline int64_t MUL64(int a, int b)
{
    int t1, t2, t3, t4;
    union { uint64_t x; unsigned hl[2]; } x;
    __asm__(
    "move.l %0, %5      \n\t"
    "move.l %0, %4      \n\t"
    "bge.b  0f          \n\t"
    "neg.l  %0          \n\t"
    "neg.l  %4          \n\t"
    "0:                 \n\t"
    "eor.l  %1, %5      \n\t"
    "move.l %1, %3      \n\t"
    "bge.b  1f          \n\t"
    "neg.l  %1          \n\t"
    "neg.l  %3          \n\t"
    "1:                 \n\t"
    "move.w #16, %5     \n\t"
    "move.l %0, %2      \n\t"
    "mulu.w %1,%0       \n\t"
    "lsr.l  %5, %3      \n\t"
    "lsr.l  %5, %4      \n\t"
    "mulu.w %3, %2      \n\t"
    "mulu.w %4, %1      \n\t"
    "mulu.w %4, %3      \n\t"
    "move.l %2, %4      \n\t"
    "lsr.l  %5, %2      \n\t"
    "add.w  %1, %4      \n\t"
    "addx.l %2, %3      \n\t"
    "lsl.l  %5, %4      \n\t"
    "lsr.l  %5, %1      \n\t"
    "add.l  %4, %0      \n\t"
    "addx.l %3, %1      \n\t"
    "tst.l  %5          \n\t"
    "bpl.b  2f          \n\t"
    "neg.l  %0          \n\t"
    "negx.l %1          \n\t"
    "2:                 \n\t"
    :"=&d"(x.hl[1]), "=&d"(x.hl[0]), "=&d"(t1), "=&d"(t2), "=&d"(t3), "=&d"(t4)
    :"0"(a), "1"(b));
    return x.x;

#NO_APP
	.text
	.even
	.globl	_MUL64
_MUL64:
	movem.l #15360,-(sp)
	move.l 20(sp),d1
	move.l 24(sp),d0
#APP
	move.l d1, d5
	move.l d1, d4
	bge.b  0f
	neg.l  d1
	neg.l  d4
	0:
	eor.l  d0, d5
	move.l d0, d3
	bge.b  1f
	neg.l  d0
	neg.l  d3
	1:
	move.w #16, d5
	move.l d1, d2
	mulu.w d0,d1
	lsr.l  d5, d3
	lsr.l  d5, d4
	mulu.w d3, d2
	mulu.w d4, d0
	mulu.w d4, d3
	move.l d2, d4
	lsr.l  d5, d2
	add.w  d0, d4
	addx.l d2, d3
	lsl.l  d5, d4
	lsr.l  d5, d0
	add.l  d4, d1
	addx.l d3, d0
	tst.l  d5
	bpl.b  2f
	neg.l  d1
	negx.l d0
	2:

#NO_APP
	movem.l (sp)+,#60
	rts


If so, I will modify MULH the same way.




More information about the ffmpeg-devel mailing list