Convert checksum.S to new ML compatible syntax. Resulting obj was compared and is identical to trunk (both GAS and ML)

svn path=/branches/cmake-bringup/; revision=50545
This commit is contained in:
Timo Kreuzer 2011-01-28 20:35:22 +00:00
parent 56a3daafd4
commit ca495a84b1

View file

@ -24,109 +24,111 @@
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
/* /*
* computes a partial checksum, e.g. for TCP/UDP fragments * computes a partial checksum, e.g. for TCP/UDP fragments
*/ */
/* /*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/ */
.text #include <asm.inc>
.code
.align 4 .align 4
.globl _csum_partial PUBLIC _csum_partial
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
/* /*
* Experiments with Ethernet and SLIP connections show that buff * Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at * is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte * Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop. * alignment for the unrolled loop.
*/ */
_csum_partial: _csum_partial:
pushl %esi push esi
pushl %ebx push ebx
movl 20(%esp),%eax # Function arg: unsigned int sum mov eax, [esp + 20] // Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len mov ecx, [esp + 16] // Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff mov esi, [esp + 12] // Function arg: unsigned char *buff
testl $3, %esi # Check alignment. test esi, 3 // Check alignment.
jz 2f # Jump if alignment is ok. jz m2 // Jump if alignment is ok.
testl $1, %esi # Check alignment. test esi, 1 // Check alignment.
jz 10f # Jump if alignment is boundary of 2bytes. jz l10 // Jump if alignment is boundary of 2bytes.
// buf is odd // buf is odd
dec %ecx dec ecx
jl 8f jl l8
movzbl (%esi), %ebx movzx ebx, byte ptr [esi]
adcl %ebx, %eax adc eax, ebx
roll $8, %eax rol eax, 8
inc %esi inc esi
testl $2, %esi test esi, 2
jz 2f jz m2
10: l10:
subl $2, %ecx # Alignment uses up two bytes. sub ecx, 2 // Alignment uses up two bytes.
jae 1f # Jump if we had at least two bytes. jae m1 // Jump if we had at least two bytes.
addl $2, %ecx # ecx was < 2. Deal with it. add ecx, 2 // ecx was < 2. Deal with it.
jmp 4f jmp l4
1: movw (%esi), %bx m1: mov bx, [esi]
addl $2, %esi add esi, 2
addw %bx, %ax add ax, bx
adcl $0, %eax adc eax, 0
2: m2:
movl %ecx, %edx mov edx, ecx
shrl $5, %ecx shr ecx, 5
jz 2f jz l2
testl %esi, %esi test esi, esi
1: movl (%esi), %ebx l1: mov ebx, [esi]
adcl %ebx, %eax adc eax, ebx
movl 4(%esi), %ebx mov ebx, [esi + 4]
adcl %ebx, %eax adc eax, ebx
movl 8(%esi), %ebx mov ebx, [esi + 8]
adcl %ebx, %eax adc eax, ebx
movl 12(%esi), %ebx mov ebx, [esi + 12]
adcl %ebx, %eax adc eax, ebx
movl 16(%esi), %ebx mov ebx, [esi + 16]
adcl %ebx, %eax adc eax, ebx
movl 20(%esi), %ebx mov ebx, [esi + 20]
adcl %ebx, %eax adc eax, ebx
movl 24(%esi), %ebx mov ebx, [esi + 24]
adcl %ebx, %eax adc eax, ebx
movl 28(%esi), %ebx mov ebx, [esi + 28]
adcl %ebx, %eax adc eax, ebx
lea 32(%esi), %esi lea esi, [esi + 32]
dec %ecx dec ecx
jne 1b jne l1
adcl $0, %eax adc eax, 0
2: movl %edx, %ecx l2: mov ecx, edx
andl $0x1c, %edx and edx, HEX(1c)
je 4f je l4
shrl $2, %edx # This clears CF shr edx, 2 // This clears CF
3: adcl (%esi), %eax l3: adc eax, [esi]
lea 4(%esi), %esi lea esi, [esi + 4]
dec %edx dec edx
jne 3b jne l3
adcl $0, %eax adc eax, 0
4: andl $3, %ecx l4: and ecx, 3
jz 7f jz l7
cmpl $2, %ecx cmp ecx, 2
jb 5f jb l5
movw (%esi),%cx mov cx, [esi]
leal 2(%esi),%esi lea esi, [esi + 2]
je 6f je l6
shll $16,%ecx shl ecx, 16
5: movb (%esi),%cl l5: mov cl, [esi]
6: addl %ecx,%eax l6: add eax, ecx
adcl $0, %eax adc eax, 0
7: l7:
testl $1, 12(%esp) test dword ptr [esp + 12], 1
jz 8f jz l8
roll $8, %eax rol eax, 8
8: l8:
popl %ebx pop ebx
popl %esi pop esi
ret ret
#else #else
@ -134,116 +136,118 @@ _csum_partial:
/* Version for PentiumII/PPro */ /* Version for PentiumII/PPro */
csum_partial: csum_partial:
pushl %esi push esi
pushl %ebx push ebx
movl 20(%esp),%eax # Function arg: unsigned int sum mov eax, [esp + 20] # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len mov ecx, [esp + 16] # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf mov esi, [esp + 12] # Function arg: const unsigned char *buf
testl $3, %esi test esi, 3
jnz 25f jnz l25f
10: l10:
movl %ecx, %edx mov edx, ecx
movl %ecx, %ebx mov ebx, ecx
andl $0x7c, %ebx and ebx, HEX(7c)
shrl $7, %ecx shr ecx, 7
addl %ebx,%esi add esi, ebx
shrl $2, %ebx shr ebx, 2
negl %ebx neg ebx
lea 45f(%ebx,%ebx,2), %ebx lea ebx, l45[ebx + ebx * 2]
testl %esi, %esi test esi, esi
jmp *%ebx jmp dword ptr [ebx]
# Handle 2-byte-aligned regions // Handle 2-byte-aligned regions
20: addw (%esi), %ax l20: add ax, [esi]
lea 2(%esi), %esi lea esi, [esi + 2]
adcl $0, %eax adc eax, 0
jmp 10b jmp l10b
25: l25:
testl $1, %esi test esi, 1
jz 30f jz l30f
# buf is odd // buf is odd
dec %ecx dec ecx
jl 90f jl l90
movzbl (%esi), %ebx movzb ebx, [esi]
addl %ebx, %eax add eax, ebx
adcl $0, %eax adc eax, 0
roll $8, %eax rol eax, 8
inc %esi inc esi
testl $2, %esi test esi, 2
jz 10b jz l10b
30: subl $2, %ecx l30: sub ecx, 2
ja 20b ja l20
je 32f je l32
addl $2, %ecx add ecx, 2
jz 80f jz l80
movzbl (%esi),%ebx # csumming 1 byte, 2-aligned movzb ebx, [esi] // csumming 1 byte, 2-aligned
addl %ebx, %eax add eax, ebx
adcl $0, %eax adc eax, 0
jmp 80f jmp l80
32: l32:
addw (%esi), %ax # csumming 2 bytes, 2-aligned add ax, [esi] // csumming 2 bytes, 2-aligned
adcl $0, %eax adc eax, 0
jmp 80f jmp l80
40: l40:
addl -128(%esi), %eax add eax, [esi -128]
adcl -124(%esi), %eax adc eax, [esi -124]
adcl -120(%esi), %eax adc eax, [esi -120]
adcl -116(%esi), %eax adc eax, [esi -116]
adcl -112(%esi), %eax adc eax, [esi -112]
adcl -108(%esi), %eax adc eax, [esi -108]
adcl -104(%esi), %eax adc eax, [esi -104]
adcl -100(%esi), %eax adc eax, [esi -100]
adcl -96(%esi), %eax adc eax, [esi -96]
adcl -92(%esi), %eax adc eax, [esi -92]
adcl -88(%esi), %eax adc eax, [esi -88]
adcl -84(%esi), %eax adc eax, [esi -84]
adcl -80(%esi), %eax adc eax, [esi -80]
adcl -76(%esi), %eax adc eax, [esi -76]
adcl -72(%esi), %eax adc eax, [esi -72]
adcl -68(%esi), %eax adc eax, [esi -68]
adcl -64(%esi), %eax adc eax, [esi -64]
adcl -60(%esi), %eax adc eax, [esi -60]
adcl -56(%esi), %eax adc eax, [esi -56]
adcl -52(%esi), %eax adc eax, [esi -52]
adcl -48(%esi), %eax adc eax, [esi -48]
adcl -44(%esi), %eax adc eax, [esi -44]
adcl -40(%esi), %eax adc eax, [esi -40]
adcl -36(%esi), %eax adc eax, [esi -36]
adcl -32(%esi), %eax adc eax, [esi -32]
adcl -28(%esi), %eax adc eax, [esi -28]
adcl -24(%esi), %eax adc eax, [esi -24]
adcl -20(%esi), %eax adc eax, [esi -20]
adcl -16(%esi), %eax adc eax, [esi -16]
adcl -12(%esi), %eax adc eax, [esi -12]
adcl -8(%esi), %eax adc eax, [esi -8]
adcl -4(%esi), %eax adc eax, [esi -4]
45: l45:
lea 128(%esi), %esi lea esi, [esi + 128]
adcl $0, %eax adc eax, 0
dec %ecx dec ecx
jge 40b jge l40
movl %edx, %ecx mov ecx, edx
50: andl $3, %ecx l50: and ecx, 3
jz 80f jz l80
# Handle the last 1-3 bytes without jumping // Handle the last 1-3 bytes without jumping
notl %ecx # 1->2, 2->1, 3->0, higher bits are masked not ecx // 1->2, 2->1, 3->0, higher bits are masked
movl $0xffffff,%ebx # by the shll and shrl instructions mov ebx, HEX(ffffff) // by the shll and shrl instructions
shll $3,%ecx shl ecx, 3
shrl %cl,%ebx shr ebx, cl
andl -128(%esi),%ebx # esi is 4-aligned so should be ok and ebx, [esi -128] // esi is 4-aligned so should be ok
addl %ebx,%eax add eax, ebx
adcl $0,%eax adc eax, 0
80: l80:
testl $1, 12(%esp) test dword ptr [esp + 12], 1
jz 90f jz l90
roll $8, %eax rol eax, 8
90: l90:
popl %ebx pop ebx
popl %esi pop esi
ret ret
#endif #endif
END