libsec: AES-NI support for amd64

Add assembler versions for aes_encrypt/aes_decrypt and the key
setup using AES-NI instruction set. This makes aes_encrypt and
aes_decrypt into function pointers which get initialized by
the first call to setupAESstate().

Note that the expanded round key words are *NOT* stored in big
endian order as with the portable implementation. For that reason
the AESstate.ekey and AESstate.dkey fields have been changed to
void* forcing an error when someone is accessing the roundkey
words. One offender was aesXCBmac, which doesnt appear to be
used and the code looks horrible so it has been deleted.

The AES-NI implementation is for amd64 only as it requires the
kernel to save/restore the FPU state across syscalls and
pagefaults.
This commit is contained in:
cinap_lenrek 2017-11-12 23:15:15 +01:00
parent 4f27f6a04f
commit 3356e0e731
11 changed files with 1453 additions and 1241 deletions

View file

@ -32,27 +32,25 @@ struct AESstate
ulong offset;
int rounds;
int keybytes;
void *ekey; /* expanded encryption round key */
void *dkey; /* expanded decryption round key */
uchar key[AESmaxkey]; /* unexpanded key */
ulong ekey[4*(AESmaxrounds + 1)]; /* encryption key */
ulong dkey[4*(AESmaxrounds + 1)]; /* decryption key */
uchar ivec[AESbsize]; /* initialization vector */
uchar mackey[3 * AESbsize]; /* 3 XCBC mac 96 keys */
uchar storage[512]; /* storage for expanded keys */
};
/* block ciphers */
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
void aesCBCencrypt(uchar *p, int len, AESstate *s);
void aesCBCdecrypt(uchar *p, int len, AESstate *s);
void aesCFBencrypt(uchar *p, int len, AESstate *s);
void aesCFBdecrypt(uchar *p, int len, AESstate *s);
void aesOFBencrypt(uchar *p, int len, AESstate *s);
void setupAESXCBCstate(AESstate *s);
uchar* aesXCBCmac(uchar *p, int len, AESstate *s);
typedef struct AESGCMstate AESGCMstate;
struct AESGCMstate
{

View file

@ -24,27 +24,25 @@ struct AESstate
ulong offset;
int rounds;
int keybytes;
void *ekey; /* expanded encryption round key */
void *dkey; /* expanded decryption round key */
uchar key[AESmaxkey]; /* unexpanded key */
ulong ekey[4*(AESmaxrounds + 1)]; /* encryption key */
ulong dkey[4*(AESmaxrounds + 1)]; /* decryption key */
uchar ivec[AESbsize]; /* initialization vector */
uchar mackey[3 * AESbsize]; /* 3 XCBC mac 96 keys */
uchar storage[512]; /* storage for expanded keys */
};
/* block ciphers */
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
void aesCBCencrypt(uchar *p, int len, AESstate *s);
void aesCBCdecrypt(uchar *p, int len, AESstate *s);
void aesCFBencrypt(uchar *p, int len, AESstate *s);
void aesCFBdecrypt(uchar *p, int len, AESstate *s);
void aesOFBencrypt(uchar *p, int len, AESstate *s);
void setupAESXCBCstate(AESstate *s);
uchar* aesXCBCmac(uchar *p, int len, AESstate *s);
typedef struct AESGCMstate AESGCMstate;
struct AESGCMstate
{

View file

@ -7,7 +7,6 @@ aesCFBencrypt, \
aesCFBdecrypt, \
aesOFBencrypt, \
aes_xts_encrypt, aes_xts_decrypt, \
setupAESXCBCstate, aesXCBCmac, \
setupAESGCMstate, \
aesgcm_setiv, aesgcm_encrypt, aesgcm_decrypt \
- advanced encryption standard (rijndael)
@ -22,14 +21,15 @@ aesgcm_setiv, aesgcm_encrypt, aesgcm_decrypt \
.PP
.in +0.5i
.ti -0.5i
.B
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
.PP
.B
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16])
.PP
.B
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16])
.PP
.B
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec)
.PP
.B
void aesCBCencrypt(uchar *p, int len, AESstate *s)
@ -53,12 +53,6 @@ void aes_xts_encrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar
void aes_xts_decrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len)
.PP
.B
void setupAESXCBCstate(AESstate *s)
.PP
.B
void aesXCBCmac(uchar *p, int len, AESstate *s)
.PP
.B
void setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
.PP
.B
@ -78,7 +72,7 @@ and
are the block ciphers, corresponding to
.IR des (2)'s
.IR block_cipher .
.IR AesCBCencrypt ,
.I AesCBCencrypt
and
.I aesCBCdecrypt
implement cipher-block-chaining encryption.
@ -94,10 +88,20 @@ and
implement the XTS-AES tweakable block cipher, per IEEE 1619-2017 (see bugs below).
.IR SetupAESstate
is used to initialize the state of the above encryption modes.
.I SetupAESXCBCstate
The expanded roundkey parameters
.I rk
and
.I aesXCBCmac
implement AES XCBC message authentication, per RFC 3566.
.I Nr
of
.I aes_encrypt
and
.I aes_decrypt
are returned in
.I AESstate.ekey
and
.I AESstate.dkey
with the corresponding number of rounds in
.IR AESstate.rounds .
.IR SetupAESGCMstate ,
.IR aesgcm_setiv ,
.I aesgcm_encrypt
@ -116,7 +120,8 @@ arguments.
Aesgcm_decrypt returns zero when authentication and decryption where successfull and
non-zero otherwise.
All ciphering is performed in place.
.I Keybytes
The byte keysize
.I nkey
should be 16, 24, or 32.
The initialization vector
.I ivec
@ -144,14 +149,6 @@ in
.br
.B http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
.SH BUGS
The functions
.IR aes_encrypt ,
.IR aes_decrypt ,
.IR setupAESXCBCstate ,
and
.IR aesXCBCmac
have not yet been verified by running test vectors through them.
.PP
Because of the way that non-multiple-of-16 buffers are handled,
.I aesCBCdecrypt
must be fed buffers of the same size as the

View file

@ -5,6 +5,7 @@ LIB=/$objtype/lib/ape/libsec.a
FILES=\
md5block\
sha1block\
aesni\
HFILES=/sys/include/ape/libsec.h

View file

@ -4,7 +4,7 @@ APE=/sys/src/ape
LIB=/$objtype/lib/ape/libsec.a
CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
aes.c aesCBC.c aesCFB.c aesOFB.c aesXCBmac.c aes_gcm.c \
aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
blowfish.c \
hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
sha2_64.c sha2_128.c sha2block64.c sha2block128.c\

View file

@ -0,0 +1,408 @@
#define AESOP(o,r1,r2) \
BYTE $0x66; \
BYTE $0x0F; \
BYTE $0x38; \
BYTE $(o); \
BYTE $(0xC0 | r2<<3 | r1)
#define AESIMC(r1,r2) AESOP(0xDB,r1,r2)
#define AESENC(r1,r2) AESOP(0xDC,r1,r2)
#define AESENCLAST(r1,r2) AESOP(0xDD,r1,r2)
#define AESDEC(r1,r2) AESOP(0xDE,r1,r2)
#define AESDECLAST(r1,r2) AESOP(0xDF,r1,r2)
#define AESKEYGENASSIST(i,r1,r2) \
BYTE $0x66; \
BYTE $0x0F; \
BYTE $0x3A; \
BYTE $0xDF; \
BYTE $(0xC0 | r2<<3 | r1); \
BYTE $(i)
TEXT aesni_init(SB), 0, $0
MOVL $1, AX
CPUID
XORL AX, AX
ANDL $(1<<25), CX
JZ _ret
/* override aes function pointers */
MOVQ $AESencrypt<>(SB), AX
MOVQ AX, aes_encrypt(SB)
MOVQ $AESdecrypt<>(SB), AX
MOVQ AX, aes_decrypt(SB)
/* return setup function pointer */
MOVQ $AESsetup<>(SB), AX
_ret:
RET
TEXT AESencrypt<>(SB), 0, $0
MOVL Nr+8(FP), CX
MOVQ pt+16(FP), SI
MOVQ ct+24(FP), DI
MOVO (RARG), X0
MOVOU (SI), X7
ADDQ $16, RARG
PXOR X7, X0
CMPL CX, $12
JLT erounds10
JEQ erounds12
erounds14:
MOVO 0(RARG), X1
MOVO 16(RARG), X2
ADDQ $32, RARG
AESENC(1, 0)
AESENC(2, 0)
erounds12:
MOVO 0(RARG), X3
MOVO 16(RARG), X4
ADDQ $32, RARG
AESENC(3, 0)
AESENC(4, 0)
erounds10:
MOVO 0(RARG), X1
MOVO 16(RARG), X2
MOVO 32(RARG), X3
MOVO 48(RARG), X4
MOVO 64(RARG), X5
MOVO 80(RARG), X6
MOVO 96(RARG), X7
AESENC(1, 0)
MOVO 112(RARG), X1
AESENC(2, 0)
MOVO 128(RARG), X2
AESENC(3, 0)
MOVO 144(RARG), X3
AESENC(4, 0)
AESENC(5, 0)
AESENC(6, 0)
AESENC(7, 0)
AESENC(1, 0)
AESENC(2, 0)
AESENCLAST(3, 0)
MOVOU X0, (DI)
RET
TEXT AESdecrypt<>(SB), 0, $0
MOVL Nr+8(FP), CX
MOVQ ct+16(FP), SI
MOVQ pt+24(FP), DI
MOVO (RARG), X0
MOVOU (SI), X7
ADDQ $16, RARG
PXOR X7, X0
CMPL CX, $12
JLT drounds10
JEQ drounds12
drounds14:
MOVO 0(RARG), X1
MOVO 16(RARG), X2
ADDQ $32, RARG
AESDEC(1, 0)
AESDEC(2, 0)
drounds12:
MOVO 0(RARG), X3
MOVO 16(RARG), X4
ADDQ $32, RARG
AESDEC(3, 0)
AESDEC(4, 0)
drounds10:
MOVO 0(RARG), X1
MOVO 16(RARG), X2
MOVO 32(RARG), X3
MOVO 48(RARG), X4
MOVO 64(RARG), X5
MOVO 80(RARG), X6
MOVO 96(RARG), X7
AESDEC(1, 0)
MOVO 112(RARG), X1
AESDEC(2, 0)
MOVO 128(RARG), X2
AESDEC(3, 0)
MOVO 144(RARG), X3
AESDEC(4, 0)
AESDEC(5, 0)
AESDEC(6, 0)
AESDEC(7, 0)
AESDEC(1, 0)
AESDEC(2, 0)
AESDECLAST(3, 0)
MOVOU X0, (DI)
RET
TEXT AESsetup<>(SB), 0, $16
MOVQ RARG, erk+0(FP)
MOVQ key+16(FP), DX
MOVL nkey+24(FP), BX
MOVQ DX, 8(SP)
CMPL BX, $32
JEQ esetup256
CMPL BX, $24
JEQ esetup192
CMPL BX, $16
JEQ esetup128
XORL AX, AX
RET
esetup256:
CALL setupEnc256<>(SB)
JMP dsetup
esetup192:
CALL setupEnc192<>(SB)
JMP dsetup
esetup128:
CALL setupEnc128<>(SB)
dsetup:
MOVQ erk+0(FP), SI
MOVQ drk+8(FP), DI
MOVL AX, BX
SHLL $4, BX
ADDQ BX, SI
MOVO (SI), X0
MOVO X0, (DI)
MOVO -16(SI), X1
MOVO -32(SI), X2
MOVO -48(SI), X3
MOVO -64(SI), X4
AESIMC(1, 1)
AESIMC(2, 2)
AESIMC(3, 3)
AESIMC(4, 4)
MOVO X1, 16(DI)
MOVO X2, 32(DI)
MOVO X3, 48(DI)
MOVO X4, 64(DI)
MOVO -80(SI), X1
MOVO -96(SI), X2
MOVO -112(SI), X3
MOVO -128(SI), X4
AESIMC(1, 1)
AESIMC(2, 2)
AESIMC(3, 3)
AESIMC(4, 4)
MOVO X1, 80(DI)
MOVO X2, 96(DI)
MOVO X3, 112(DI)
MOVO X4, 128(DI)
MOVO -144(SI), X1
AESIMC(1, 1)
MOVO X1, 144(DI)
CMPL AX, $10
JEQ dsetupend
MOVO -160(SI), X1
MOVO -176(SI), X2
AESIMC(1, 1)
AESIMC(2, 2)
MOVO X1, 160(DI)
MOVO X2, 176(DI)
CMPL AX, $12
JEQ dsetupend
MOVO -192(SI), X1
MOVO -208(SI), X2
AESIMC(1, 1)
AESIMC(2, 2)
MOVO X1, 192(DI)
MOVO X2, 208(DI)
dsetupend:
SUBQ BX, SI
ADDQ BX, DI
MOVO (SI), X0
MOVO X0, (DI)
RET
TEXT setupEnc128<>(SB), 0, $0
MOVQ key+8(FP), SI
MOVOU (SI), X1
MOVO X1, (RARG)
AESKEYGENASSIST(0x01, 1, 0)
CALL rk128<>(SB)
MOVO X1, 16(RARG)
AESKEYGENASSIST(0x02, 1, 0)
CALL rk128<>(SB)
MOVO X1, 32(RARG)
AESKEYGENASSIST(0x04, 1, 0)
CALL rk128<>(SB)
MOVO X1, 48(RARG)
AESKEYGENASSIST(0x08, 1, 0)
CALL rk128<>(SB)
MOVO X1, 64(RARG)
AESKEYGENASSIST(0x10, 1, 0)
CALL rk128<>(SB)
MOVO X1, 80(RARG)
AESKEYGENASSIST(0x20, 1, 0)
CALL rk128<>(SB)
MOVO X1, 96(RARG)
AESKEYGENASSIST(0x40, 1, 0)
CALL rk128<>(SB)
MOVO X1, 112(RARG)
AESKEYGENASSIST(0x80, 1, 0)
CALL rk128<>(SB)
MOVO X1, 128(RARG)
AESKEYGENASSIST(0x1b, 1, 0)
CALL rk128<>(SB)
MOVO X1, 144(RARG)
AESKEYGENASSIST(0x36, 1, 0)
CALL rk128<>(SB)
MOVO X1, 160(RARG)
MOVL $10, AX
RET
TEXT rk128<>(SB), 0, $0
PSHUFL $0xff, X0, X0
MOVO X1, X2
PSLLO $4, X2
PXOR X2, X1
PSLLO $4, X2
PXOR X2, X1
PSLLO $4, X2
PXOR X2, X1
PXOR X0, X1
RET
TEXT setupEnc192<>(SB), 0, $0
MOVQ key+8(FP), SI
MOVOU (SI), X1
MOVOU 16(SI), X2
MOVO X1, (RARG)
MOVO X2, X5
AESKEYGENASSIST(0x01, 2, 0)
CALL rk192<>(SB)
SHUFPD $0, X1, X5
MOVO X5, 16(RARG)
MOVO X1, X6
SHUFPD $1, X2, X6
MOVO X6, 32(RARG)
AESKEYGENASSIST(0x02, 2, 0)
CALL rk192<>(SB)
MOVO X1, 48(RARG)
MOVO X2, X5
AESKEYGENASSIST(0x04, 2, 0)
CALL rk192<>(SB)
SHUFPD $0, X1, X5
MOVO X5, 64(RARG)
MOVO X1, X6
SHUFPD $1, X2, X6
MOVO X6, 80(RARG)
AESKEYGENASSIST(0x08, 2, 0)
CALL rk192<>(SB)
MOVO X1, 96(RARG)
MOVO X2, X5
AESKEYGENASSIST(0x10, 2, 0)
CALL rk192<>(SB)
SHUFPD $0, X1, X5
MOVO X5, 112(RARG)
MOVO X1, X6
SHUFPD $1, X2, X6
MOVO X6, 128(RARG)
AESKEYGENASSIST(0x20, 2, 0)
CALL rk192<>(SB)
MOVO X1, 144(RARG)
MOVO X2, X5
AESKEYGENASSIST(0x40, 2, 0)
CALL rk192<>(SB)
SHUFPD $0, X1, X5
MOVO X5, 160(RARG)
MOVO X1, X6
SHUFPD $1, X2, X6
MOVO X6, 176(RARG)
AESKEYGENASSIST(0x80, 2, 0)
CALL rk192<>(SB)
MOVO X1, 192(RARG)
MOVL $12, AX
RET
TEXT rk192<>(SB), 0, $0
PSHUFL $0x55, X0, X0
MOVOU X1, X4
PSLLO $4, X4
PXOR X4, X1
PSLLO $4, X4
PXOR X4, X1
PSLLO $4, X4
PXOR X4, X1
PXOR X0, X1
PSHUFL $0xff, X1, X0
MOVOU X2, X4
PSLLO $4, X4
PXOR X4, X2
PXOR X0, X2
RET
TEXT setupEnc256<>(SB), 0, $0
MOVQ key+8(FP), SI
MOVOU (SI), X1
MOVOU 16(SI), X2
MOVO X1, (RARG)
MOVO X2, 16(RARG)
AESKEYGENASSIST(0x01, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 32(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 48(RARG)
AESKEYGENASSIST(0x02, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 64(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 80(RARG)
AESKEYGENASSIST(0x04, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 96(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 112(RARG)
AESKEYGENASSIST(0x08, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 128(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 144(RARG)
AESKEYGENASSIST(0x10, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 160(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 176(RARG)
AESKEYGENASSIST(0x20, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 192(RARG)
AESKEYGENASSIST(0x00, 1, 0)
CALL rk256_b<>(SB)
MOVO X2, 208(RARG)
AESKEYGENASSIST(0x40, 2, 0)
CALL rk256_a<>(SB)
MOVO X1, 224(RARG)
MOVL $14, AX
RET
TEXT rk256_a<>(SB), 0, $0
PSHUFL $0xff, X0, X0
MOVO X1, X4
PSLLO $4, X4
PXOR X4, X1
PSLLO $4, X4
PXOR X4, X1
PSLLO $4, X4
PXOR X4, X1
PXOR X0, X1
RET
TEXT rk256_b<>(SB), 0, $0
PSHUFL $0xaa, X0, X0
MOVO X2, X4
PSLLO $4, X4
PXOR X4, X2
PSLLO $4, X4
PXOR X4, X2
PSLLO $4, X4
PXOR X4, X2
PXOR X0, X2
RET

View file

@ -5,6 +5,7 @@ LIB=/$objtype/lib/libsec.a
FILES=\
md5block\
sha1block\
aesni\
HFILES=/sys/include/libsec.h

File diff suppressed because it is too large Load diff

View file

@ -150,9 +150,10 @@ setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
{
setupAESstate(s, key, keylen, nil);
memset(s->mackey, 0, AESbsize);
aes_encrypt(s->ekey, s->rounds, s->mackey, s->mackey);
load128(s->mackey, s->H);
memset(s->ivec, 0, AESbsize);
aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
load128(s->ivec, s->H);
memset(s->ivec, 0, AESbsize);
prepareM(s->H, s->M);
if(iv != nil && ivlen > 0)

View file

@ -0,0 +1,5 @@
void*
aesni_init(void)
{
return 0;
}

View file

@ -3,7 +3,7 @@
LIB=/$objtype/lib/libsec.a
CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
aes.c aesCBC.c aesCFB.c aesOFB.c aesXCBmac.c aes_gcm.c \
aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
blowfish.c \
hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
sha2_64.c sha2_128.c sha2block64.c sha2block128.c\