/* Copyright (c) Mark Harmstone 2020 * * This file is part of WinBtrfs. * * WinBtrfs is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public Licence as published by * the Free Software Foundation, either version 3 of the Licence, or * (at your option) any later version. * * WinBtrfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public Licence for more details. * * You should have received a copy of the GNU Lesser General Public Licence * along with WinBtrfs. If not, see . */ #include #ifdef __x86_64__ .code64 /* void do_xor_sse2(uint8_t* buf1, uint8_t* buf2, uint32_t len); */ PUBLIC do_xor_sse2 do_xor_sse2: /* rcx = buf1 * rdx = buf2 * r8d = len * rax = tmp1 * r9 = tmp2 * xmm0 = tmp3 * xmm1 = tmp4 */ mov rax, rcx and rax, 15 cmp rax, 0 jne stragglers2 mov rax, rdx and rax, 15 cmp rax, 0 jne stragglers2 do_xor_sse2_loop: cmp r8d, 16 jl stragglers2 movdqa xmm0, [rcx] movdqa xmm1, [rdx] pxor xmm0, xmm1 movdqa [rcx], xmm0 add rcx, 16 add rdx, 16 sub r8d, 16 jmp do_xor_sse2_loop stragglers2: cmp r8d, 8 jl stragglers mov rax, [rcx] mov r9, [rdx] xor rax, r9 mov [rcx], rax add rcx, 8 add rdx, 8 sub r8d, 8 jmp stragglers2 stragglers: cmp r8d, 0 je do_xor_sse2_end mov al, [rcx] mov r9b, [rdx] xor al, r9b mov [rcx], al inc rcx inc rdx dec r8d jmp stragglers do_xor_sse2_end: ret /* void do_xor_avx2(uint8_t* buf1, uint8_t* buf2, uint32_t len); */ PUBLIC do_xor_avx2 do_xor_avx2: /* rcx = buf1 * rdx = buf2 * r8d = len * rax = tmp1 * r9 = tmp2 * xmm0 = tmp3 * xmm1 = tmp4 */ mov rax, rcx and rax, 31 cmp rax, 0 jne stragglers4 mov rax, rdx and rax, 31 cmp rax, 0 jne stragglers4 do_xor_avx2_loop: cmp r8d, 32 jl stragglers4 vmovdqa ymm0, YMMWORD PTR[rcx] vmovdqa ymm1, YMMWORD PTR[rdx] vpxor ymm0, ymm0, ymm1 vmovdqa YMMWORD PTR[rcx], ymm0 add rcx, 32 add rdx, 32 sub r8d, 32 jmp do_xor_avx2_loop stragglers4: cmp r8d, 8 jl stragglers3 mov rax, [rcx] mov r9, [rdx] xor rax, r9 mov [rcx], rax add rcx, 8 add rdx, 8 sub r8d, 8 jmp stragglers4 stragglers3: cmp r8d, 0 je do_xor_avx2_end mov al, [rcx] mov r9b, [rdx] xor al, r9b mov [rcx], al inc rcx inc rdx dec r8d jmp stragglers3 do_xor_avx2_end: ret END #else .code /* void __stdcall do_xor_sse2(uint8_t* buf1, uint8_t* buf2, uint32_t len); */ PUBLIC _do_xor_sse2@12 _do_xor_sse2@12: /* edi = buf1 * edx = buf2 * esi = len * eax = tmp1 * ecx = tmp2 * xmm0 = tmp3 * xmm1 = tmp4 */ push ebp mov ebp, esp push esi push edi mov edi, [ebp+8] mov edx, [ebp+12] mov esi, [ebp+16] mov eax, edi and eax, 15 cmp eax, 0 jne stragglers2 mov eax, edx and eax, 15 cmp eax, 0 jne stragglers2 do_xor_sse2_loop: cmp esi, 16 jl stragglers2 movdqa xmm0, [edi] movdqa xmm1, [edx] pxor xmm0, xmm1 movdqa [edi], xmm0 add edi, 16 add edx, 16 sub esi, 16 jmp do_xor_sse2_loop stragglers2: cmp esi, 4 jl stragglers mov eax, [edi] mov ecx, [edx] xor eax, ecx mov [edi], eax add edi, 4 add edx, 4 sub esi, 4 jmp stragglers2 stragglers: cmp esi, 0 je do_xor_sse2_end mov al, [edi] mov cl, [edx] xor al, cl mov [edi], al inc edi inc edx dec esi jmp stragglers do_xor_sse2_end: pop edi pop esi pop ebp ret 12 /* void __stdcall do_xor_avx2(uint8_t* buf1, uint8_t* buf2, uint32_t len); */ PUBLIC _do_xor_avx2@12 _do_xor_avx2@12: /* edi = buf1 * edx = buf2 * esi = len * eax = tmp1 * ecx = tmp2 * xmm0 = tmp3 * xmm1 = tmp4 */ push ebp mov ebp, esp push esi push edi mov edi, [ebp+8] mov edx, [ebp+12] mov esi, [ebp+16] mov eax, edi and eax, 31 cmp eax, 0 jne stragglers4 mov eax, edx and eax, 31 cmp eax, 0 jne stragglers4 do_xor_avx2_loop: cmp esi, 32 jl stragglers4 vmovdqa ymm0, YMMWORD PTR[edi] vmovdqa ymm1, YMMWORD PTR[edx] vpxor ymm0, ymm0, ymm1 vmovdqa YMMWORD PTR[edi], ymm0 add edi, 32 add edx, 32 sub esi, 32 jmp do_xor_avx2_loop stragglers4: cmp esi, 4 jl stragglers3 mov eax, [edi] mov ecx, [edx] xor eax, ecx mov [edi], eax add edi, 4 add edx, 4 sub esi, 4 jmp stragglers4 stragglers3: cmp esi, 0 je do_xor_avx2_end mov al, [edi] mov cl, [edx] xor al, cl mov [edi], al inc edi inc edx dec esi jmp stragglers3 do_xor_avx2_end: pop edi pop esi pop ebp ret 12 END #endif