171 lines
2.4 KiB
ArmAsm
171 lines
2.4 KiB
ArmAsm
#define BDNZ BC 16,0,
|
|
TEXT memmove(SB), $0
|
|
BR move
|
|
|
|
TEXT memcpy(SB), $0
|
|
move:
|
|
|
|
/*
|
|
* performance:
|
|
* (tba)
|
|
*/
|
|
|
|
MOVW R3, s1+0(FP)
|
|
MOVW n+8(FP), R9 /* R9 is count */
|
|
MOVW R3, R10 /* R10 is to-pointer */
|
|
CMP R9, $0
|
|
BEQ ret
|
|
BLT trap
|
|
MOVW s2+4(FP), R11 /* R11 is from-pointer */
|
|
|
|
/*
|
|
* if no more than 16 bytes, just use one lsw/stsw
|
|
*/
|
|
CMP R9, $16
|
|
BLE fout
|
|
|
|
ADD R9,R11, R13 /* R13 is end from-pointer */
|
|
ADD R9,R10, R12 /* R12 is end to-pointer */
|
|
|
|
/*
|
|
* easiest test is copy backwards if
|
|
* destination string has higher mem address
|
|
*/
|
|
CMPU R10, R11
|
|
BGT back
|
|
|
|
/*
|
|
* test if both pointers
|
|
* are similarly word aligned
|
|
*/
|
|
XOR R10,R11, R7
|
|
ANDCC $3,R7
|
|
BNE fbad
|
|
|
|
/*
|
|
* move a few bytes to align pointers
|
|
*/
|
|
ANDCC $3,R10,R7
|
|
BEQ f2
|
|
SUBC R7, $4, R7
|
|
SUB R7, R9
|
|
MOVW R7, XER
|
|
LSW (R11), R16
|
|
ADD R7, R11
|
|
STSW R16, (R10)
|
|
ADD R7, R10
|
|
|
|
/*
|
|
* turn R14 into doubleword count
|
|
* copy 16 bytes at a time while there's room.
|
|
*/
|
|
f2:
|
|
SRAWCC $4, R9, R14
|
|
BLE fout
|
|
MOVW R14, CTR
|
|
SUB $4, R11
|
|
SUB $4, R10
|
|
f3:
|
|
MOVWU 4(R11), R16
|
|
MOVWU R16, 4(R10)
|
|
MOVWU 4(R11), R17
|
|
MOVWU R17, 4(R10)
|
|
MOVWU 4(R11), R16
|
|
MOVWU R16, 4(R10)
|
|
MOVWU 4(R11), R17
|
|
MOVWU R17, 4(R10)
|
|
BDNZ f3
|
|
RLWNMCC $0, R9, $15, R9 /* residue */
|
|
BEQ ret
|
|
ADD $4, R11
|
|
ADD $4, R10
|
|
|
|
/*
|
|
* move up to 16 bytes through R16 .. R19; aligned and unaligned
|
|
*/
|
|
fout:
|
|
MOVW R9, XER
|
|
LSW (R11), R16
|
|
STSW R16, (R10)
|
|
BR ret
|
|
|
|
/*
|
|
* loop for unaligned copy, then copy up to 15 remaining bytes
|
|
*/
|
|
fbad:
|
|
SRAWCC $4, R9, R14
|
|
BLE f6
|
|
MOVW R14, CTR
|
|
f5:
|
|
LSW (R11), $16, R16
|
|
ADD $16, R11
|
|
STSW R16, $16, (R10)
|
|
ADD $16, R10
|
|
BDNZ f5
|
|
RLWNMCC $0, R9, $15, R9 /* residue */
|
|
BEQ ret
|
|
f6:
|
|
MOVW R9, XER
|
|
LSW (R11), R16
|
|
STSW R16, (R10)
|
|
BR ret
|
|
|
|
/*
|
|
* whole thing repeated for backwards
|
|
*/
|
|
back:
|
|
CMP R9, $4
|
|
BLT bout
|
|
|
|
XOR R12,R13, R7
|
|
ANDCC $3,R7
|
|
BNE bout
|
|
b1:
|
|
ANDCC $3,R13, R7
|
|
BEQ b2
|
|
MOVBZU -1(R13), R16
|
|
MOVBZU R16, -1(R12)
|
|
SUB $1, R9
|
|
BR b1
|
|
b2:
|
|
SRAWCC $4, R9, R14
|
|
BLE b4
|
|
MOVW R14, CTR
|
|
b3:
|
|
MOVWU -4(R13), R16
|
|
MOVWU R16, -4(R12)
|
|
MOVWU -4(R13), R17
|
|
MOVWU R17, -4(R12)
|
|
MOVWU -4(R13), R16
|
|
MOVWU R16, -4(R12)
|
|
MOVWU -4(R13), R17
|
|
MOVWU R17, -4(R12)
|
|
BDNZ b3
|
|
RLWNMCC $0, R9, $15, R9 /* residue */
|
|
BEQ ret
|
|
b4:
|
|
SRAWCC $2, R9, R14
|
|
BLE bout
|
|
MOVW R14, CTR
|
|
b5:
|
|
MOVWU -4(R13), R16
|
|
MOVWU R16, -4(R12)
|
|
BDNZ b5
|
|
RLWNMCC $0, R9, $3, R9 /* residue */
|
|
BEQ ret
|
|
|
|
bout:
|
|
CMPU R13, R11
|
|
BLE ret
|
|
MOVBZU -1(R13), R16
|
|
MOVBZU R16, -1(R12)
|
|
BR bout
|
|
|
|
trap:
|
|
MOVW $0, R0
|
|
MOVW 0(R0), R0
|
|
|
|
ret:
|
|
MOVW s1+0(FP), R3
|
|
RETURN
|