[Ffmpeg-devel] [patch] move av_xiphlacing in avutil
Jindrich Makovicka
makovick
Thu Nov 24 21:38:46 CET 2005
Michael Niedermayer wrote:
> Hi
>
> On Wed, Nov 23, 2005 at 07:52:29AM +0100, Jindrich Makovicka wrote:
>
>>Michael Niedermayer wrote:
>>
>>>hmm, IMHO if the code is small and generic it should be ok (a single crc
>>>calculation function for all 8,16 and 32bit, and no hardcoded tables, but
>>>tables generated with some init_crc() function)
>>>unless there are objections of course ...
>>
>>This should allow arbitrary 8-32 big & little endian crc. The code is
>>currently quite a lot slower than a hardcoded table, about 30% slower
>>for 16bit crc, slightly better for 8 and 32 bit.
>
>
> could you post the generated asm code (gcc -S -O3 -march...) of the slower
> code and of the current fast code
CFLAGS="-S -O3 -march=athlon-4 -mtune=athlon-4"
uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);
crc16_block:
pushl %ebp
xorl %eax, %eax
movl %esp, %ebp
pushl %edi
movl 8(%ebp), %edi
pushl %esi
movl 12(%ebp), %esi
pushl %ebx
testl %esi, %esi
je .L13
xorl %ebx, %ebx
xorl %ecx, %ecx
.p2align 4,,7
.L14:
movzbl (%edi,%ebx), %eax
movzbl %ch, %edx
incl %ebx
sall $8, %ecx
xorl %edx, %eax
xorw crc_lut(%eax,%eax), %cx
cmpl %ebx, %esi
jne .L14
movzwl %cx, %eax
.L13:
popl %ebx
popl %esi
popl %edi
leave
ret
uint32_t av_crc(AVCRC *ctx, uint32_t start_crc, uint8_t *buffer, size_t
length) - .L61 is relevant for crc16
av_crc:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $12, %esp
movl 8(%ebp), %esi
movl 16(%ebp), %ebx
movl 20(%ebp), %edi
movl 8(%esi), %eax
movl %eax, %edx
movl %eax, -20(%ebp)
movl 4(%esi), %eax
andl 12(%ebp), %edx
cmpl $8, %eax
jg .L60
xorl %ecx, %ecx
testl %edi, %edi
je .L43
.p2align 4,,7
.L42:
movzbl (%ebx), %eax
incl %ecx
incl %ebx
xorl %edx, %eax
cmpl %ecx, %edi
movzbl %al, %eax
movl 12(%esi,%eax,4), %edx
jne .L42
.p2align 4,,7
.L43:
andl -20(%ebp), %edx
addl $12, %esp
popl %ebx
popl %esi
popl %edi
leave
movl %edx, %eax
ret
.p2align 4,,7
.L60:
movl (%esi), %ecx
testl %ecx, %ecx
je .L61
xorl %ecx, %ecx
testl %edi, %edi
je .L43
.p2align 4,,7
.L47:
movzbl (%ebx), %eax
incl %ecx
incl %ebx
xorl %edx, %eax
shrl $8, %edx
movzbl %al, %eax
xorl 12(%esi,%eax,4), %edx
cmpl %ecx, %edi
je .L43
movzbl (%ebx), %eax
incl %ecx
incl %ebx
xorl %edx, %eax
shrl $8, %edx
movzbl %al, %eax
xorl 12(%esi,%eax,4), %edx
cmpl %ecx, %edi
jne .L47
jmp .L43
.p2align 4,,7
.L61:
testl %edi, %edi
je .L43
subl $8, %eax
movl $0, -24(%ebp)
movl %eax, -16(%ebp)
.p2align 4,,7
.L48:
movzbl -16(%ebp), %ecx
movl %edx, %eax
sall $8, %edx
shrl %cl, %eax
xorb (%ebx), %al
incl -24(%ebp)
incl %ebx
movzbl %al, %eax
xorl 12(%esi,%eax,4), %edx
cmpl -24(%ebp), %edi
jne .L48
andl -20(%ebp), %edx
addl $12, %esp
popl %ebx
popl %esi
popl %edi
leave
movl %edx, %eax
ret
>
> hmm, cant the table and start_crc be modified so that the ctx->le case is always useable?
>
I am not sure, I'll try to look at other implementations.
--
Jindrich Makovicka
More information about the ffmpeg-devel
mailing list