libc: provide arm64 assembly versions for memmove() and memset()

just a first approximation, uses unaligned 8 byte loads and stores.
MOVP not yet implemented by the linker. no ZVA cache magic yet.
This commit is contained in:
cinap_lenrek 2019-05-09 02:32:09 +02:00
parent 1046d3e30b
commit 4e7fbabfc9
6 changed files with 158 additions and 0 deletions

View file

@ -0,0 +1,50 @@
TEXT memcpy(SB), $-4
TEXT memmove(SB), $-4
MOV from+8(FP), R1
MOVWU n+16(FP), R2
CMP R0, R1
BEQ _done
BLT _backward
_forward:
ADD R0, R2, R3
BIC $7, R2, R4
CBZ R4, _floop1
ADD R0, R4, R4
_floop8:
MOV (R1)8!, R5
MOV R5, (R0)8!
CMP R4, R0
BNE _floop8
_floop1:
CMP R3, R0
BEQ _done
MOVBU (R1)1!, R5
MOVBU R5, (R0)1!
B _floop1
_done:
RETURN
_backward:
ADD R2, R1, R1
ADD R2, R0, R3
BIC $7, R2, R4
CBZ R4, _bloop1
SUB R4, R3, R4
_bloop8:
MOV -8(R1)!, R5
MOV R5, -8(R3)!
CMP R4, R3
BNE _bloop8
_bloop1:
CMP R0, R3
BEQ _done
MOVBU -1(R1)!, R5
MOVBU R5, -1(R3)!
B _bloop1

View file

@ -0,0 +1,27 @@
TEXT memset(SB), $-4
MOVBU c+8(FP), R1
MOVWU n+16(FP), R2
ADD R0, R2, R3
BIC $7, R2, R4
CBZ R4, _loop1
ADD R0, R4, R4
ORR R1<<8, R1
ORR R1<<16, R1
ORR R1<<32, R1
_loop8:
MOV R1, (R0)8!
CMP R4, R0
BNE _loop8
_loop1:
CMP R3, R0
BEQ _done
MOVBU R1, (R0)1!
B _loop1
_done:
RETURN

View file

@ -6,6 +6,8 @@ OFILES=\
getfcr.$O\
lock.$O\
main9.$O\
memmove.$O\
memset.$O\
notetramp.$O\
setjmp.$O\
tas.$O\

View file

@ -0,0 +1,50 @@
TEXT memcpy(SB), $-4
TEXT memmove(SB), $-4
MOV from+8(FP), R1
MOVWU n+16(FP), R2
CMP R0, R1
BEQ _done
BLT _backward
_forward:
ADD R0, R2, R3
BIC $7, R2, R4
CBZ R4, _floop1
ADD R0, R4, R4
_floop8:
MOV (R1)8!, R5
MOV R5, (R0)8!
CMP R4, R0
BNE _floop8
_floop1:
CMP R3, R0
BEQ _done
MOVBU (R1)1!, R5
MOVBU R5, (R0)1!
B _floop1
_done:
RETURN
_backward:
ADD R2, R1, R1
ADD R2, R0, R3
BIC $7, R2, R4
CBZ R4, _bloop1
SUB R4, R3, R4
_bloop8:
MOV -8(R1)!, R5
MOV R5, -8(R3)!
CMP R4, R3
BNE _bloop8
_bloop1:
CMP R0, R3
BEQ _done
MOVBU -1(R1)!, R5
MOVBU R5, -1(R3)!
B _bloop1

View file

@ -0,0 +1,27 @@
TEXT memset(SB), $-4
MOVBU c+8(FP), R1
MOVWU n+16(FP), R2
ADD R0, R2, R3
BIC $7, R2, R4
CBZ R4, _loop1
ADD R0, R4, R4
ORR R1<<8, R1
ORR R1<<16, R1
ORR R1<<32, R1
_loop8:
MOV R1, (R0)8!
CMP R4, R0
BNE _loop8
_loop1:
CMP R3, R0
BEQ _done
MOVBU R1, (R0)1!
B _loop1
_done:
RETURN

View file

@ -11,6 +11,8 @@ SFILES=\
getfcr.s\
main9.s\
main9p.s\
memmove.s\
memset.s\
setjmp.s\
tas.s\