libsec: AES-NI support for amd64
Add assembler versions for aes_encrypt/aes_decrypt and the key setup using AES-NI instruction set. This makes aes_encrypt and aes_decrypt into function pointers which get initialized by the first call to setupAESstate(). Note that the expanded round key words are *NOT* stored in big endian order as with the portable implementation. For that reason the AESstate.ekey and AESstate.dkey fields have been changed to void* forcing an error when someone is accessing the roundkey words. One offender was aesXCBmac, which doesnt appear to be used and the code looks horrible so it has been deleted. The AES-NI implementation is for amd64 only as it requires the kernel to save/restore the FPU state across syscalls and pagefaults.
This commit is contained in:
parent
4f27f6a04f
commit
3356e0e731
11 changed files with 1453 additions and 1241 deletions
|
@ -32,27 +32,25 @@ struct AESstate
|
|||
ulong offset;
|
||||
int rounds;
|
||||
int keybytes;
|
||||
void *ekey; /* expanded encryption round key */
|
||||
void *dkey; /* expanded decryption round key */
|
||||
uchar key[AESmaxkey]; /* unexpanded key */
|
||||
ulong ekey[4*(AESmaxrounds + 1)]; /* encryption key */
|
||||
ulong dkey[4*(AESmaxrounds + 1)]; /* decryption key */
|
||||
uchar ivec[AESbsize]; /* initialization vector */
|
||||
uchar mackey[3 * AESbsize]; /* 3 XCBC mac 96 keys */
|
||||
uchar storage[512]; /* storage for expanded keys */
|
||||
};
|
||||
|
||||
/* block ciphers */
|
||||
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
|
||||
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
|
||||
extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
|
||||
extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
|
||||
|
||||
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
|
||||
|
||||
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
|
||||
void aesCBCencrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCBCdecrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCFBencrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCFBdecrypt(uchar *p, int len, AESstate *s);
|
||||
void aesOFBencrypt(uchar *p, int len, AESstate *s);
|
||||
|
||||
void setupAESXCBCstate(AESstate *s);
|
||||
uchar* aesXCBCmac(uchar *p, int len, AESstate *s);
|
||||
|
||||
typedef struct AESGCMstate AESGCMstate;
|
||||
struct AESGCMstate
|
||||
{
|
||||
|
|
|
@ -24,27 +24,25 @@ struct AESstate
|
|||
ulong offset;
|
||||
int rounds;
|
||||
int keybytes;
|
||||
void *ekey; /* expanded encryption round key */
|
||||
void *dkey; /* expanded decryption round key */
|
||||
uchar key[AESmaxkey]; /* unexpanded key */
|
||||
ulong ekey[4*(AESmaxrounds + 1)]; /* encryption key */
|
||||
ulong dkey[4*(AESmaxrounds + 1)]; /* decryption key */
|
||||
uchar ivec[AESbsize]; /* initialization vector */
|
||||
uchar mackey[3 * AESbsize]; /* 3 XCBC mac 96 keys */
|
||||
uchar storage[512]; /* storage for expanded keys */
|
||||
};
|
||||
|
||||
/* block ciphers */
|
||||
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
|
||||
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
|
||||
extern void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
|
||||
extern void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
|
||||
|
||||
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
|
||||
|
||||
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
|
||||
void aesCBCencrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCBCdecrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCFBencrypt(uchar *p, int len, AESstate *s);
|
||||
void aesCFBdecrypt(uchar *p, int len, AESstate *s);
|
||||
void aesOFBencrypt(uchar *p, int len, AESstate *s);
|
||||
|
||||
void setupAESXCBCstate(AESstate *s);
|
||||
uchar* aesXCBCmac(uchar *p, int len, AESstate *s);
|
||||
|
||||
typedef struct AESGCMstate AESGCMstate;
|
||||
struct AESGCMstate
|
||||
{
|
||||
|
|
|
@ -7,7 +7,6 @@ aesCFBencrypt, \
|
|||
aesCFBdecrypt, \
|
||||
aesOFBencrypt, \
|
||||
aes_xts_encrypt, aes_xts_decrypt, \
|
||||
setupAESXCBCstate, aesXCBCmac, \
|
||||
setupAESGCMstate, \
|
||||
aesgcm_setiv, aesgcm_encrypt, aesgcm_decrypt \
|
||||
- advanced encryption standard (rijndael)
|
||||
|
@ -22,14 +21,15 @@ aesgcm_setiv, aesgcm_encrypt, aesgcm_decrypt \
|
|||
.PP
|
||||
.in +0.5i
|
||||
.ti -0.5i
|
||||
.B
|
||||
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16]);
|
||||
.PP
|
||||
.B
|
||||
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16]);
|
||||
void aes_encrypt(ulong rk[], int Nr, uchar pt[16], uchar ct[16])
|
||||
.PP
|
||||
.B
|
||||
void setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
|
||||
void aes_decrypt(ulong rk[], int Nr, uchar ct[16], uchar pt[16])
|
||||
.PP
|
||||
.B
|
||||
void setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec)
|
||||
.PP
|
||||
.B
|
||||
void aesCBCencrypt(uchar *p, int len, AESstate *s)
|
||||
|
@ -53,12 +53,6 @@ void aes_xts_encrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar
|
|||
void aes_xts_decrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, ulong len)
|
||||
.PP
|
||||
.B
|
||||
void setupAESXCBCstate(AESstate *s)
|
||||
.PP
|
||||
.B
|
||||
void aesXCBCmac(uchar *p, int len, AESstate *s)
|
||||
.PP
|
||||
.B
|
||||
void setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
|
||||
.PP
|
||||
.B
|
||||
|
@ -78,7 +72,7 @@ and
|
|||
are the block ciphers, corresponding to
|
||||
.IR des (2)'s
|
||||
.IR block_cipher .
|
||||
.IR AesCBCencrypt ,
|
||||
.I AesCBCencrypt
|
||||
and
|
||||
.I aesCBCdecrypt
|
||||
implement cipher-block-chaining encryption.
|
||||
|
@ -94,10 +88,20 @@ and
|
|||
implement the XTS-AES tweakable block cipher, per IEEE 1619-2017 (see bugs below).
|
||||
.IR SetupAESstate
|
||||
is used to initialize the state of the above encryption modes.
|
||||
.I SetupAESXCBCstate
|
||||
The expanded roundkey parameters
|
||||
.I rk
|
||||
and
|
||||
.I aesXCBCmac
|
||||
implement AES XCBC message authentication, per RFC 3566.
|
||||
.I Nr
|
||||
of
|
||||
.I aes_encrypt
|
||||
and
|
||||
.I aes_decrypt
|
||||
are returned in
|
||||
.I AESstate.ekey
|
||||
and
|
||||
.I AESstate.dkey
|
||||
with the corresponding number of rounds in
|
||||
.IR AESstate.rounds .
|
||||
.IR SetupAESGCMstate ,
|
||||
.IR aesgcm_setiv ,
|
||||
.I aesgcm_encrypt
|
||||
|
@ -116,7 +120,8 @@ arguments.
|
|||
Aesgcm_decrypt returns zero when authentication and decryption where successfull and
|
||||
non-zero otherwise.
|
||||
All ciphering is performed in place.
|
||||
.I Keybytes
|
||||
The byte keysize
|
||||
.I nkey
|
||||
should be 16, 24, or 32.
|
||||
The initialization vector
|
||||
.I ivec
|
||||
|
@ -144,14 +149,6 @@ in
|
|||
.br
|
||||
.B http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
.SH BUGS
|
||||
The functions
|
||||
.IR aes_encrypt ,
|
||||
.IR aes_decrypt ,
|
||||
.IR setupAESXCBCstate ,
|
||||
and
|
||||
.IR aesXCBCmac
|
||||
have not yet been verified by running test vectors through them.
|
||||
.PP
|
||||
Because of the way that non-multiple-of-16 buffers are handled,
|
||||
.I aesCBCdecrypt
|
||||
must be fed buffers of the same size as the
|
||||
|
|
|
@ -5,6 +5,7 @@ LIB=/$objtype/lib/ape/libsec.a
|
|||
FILES=\
|
||||
md5block\
|
||||
sha1block\
|
||||
aesni\
|
||||
|
||||
HFILES=/sys/include/ape/libsec.h
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ APE=/sys/src/ape
|
|||
LIB=/$objtype/lib/ape/libsec.a
|
||||
|
||||
CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
|
||||
aes.c aesCBC.c aesCFB.c aesOFB.c aesXCBmac.c aes_gcm.c \
|
||||
aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
|
||||
blowfish.c \
|
||||
hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
|
||||
sha2_64.c sha2_128.c sha2block64.c sha2block128.c\
|
||||
|
|
408
sys/src/libsec/amd64/aesni.s
Normal file
408
sys/src/libsec/amd64/aesni.s
Normal file
|
@ -0,0 +1,408 @@
|
|||
#define AESOP(o,r1,r2) \
|
||||
BYTE $0x66; \
|
||||
BYTE $0x0F; \
|
||||
BYTE $0x38; \
|
||||
BYTE $(o); \
|
||||
BYTE $(0xC0 | r2<<3 | r1)
|
||||
|
||||
#define AESIMC(r1,r2) AESOP(0xDB,r1,r2)
|
||||
#define AESENC(r1,r2) AESOP(0xDC,r1,r2)
|
||||
#define AESENCLAST(r1,r2) AESOP(0xDD,r1,r2)
|
||||
#define AESDEC(r1,r2) AESOP(0xDE,r1,r2)
|
||||
#define AESDECLAST(r1,r2) AESOP(0xDF,r1,r2)
|
||||
|
||||
#define AESKEYGENASSIST(i,r1,r2) \
|
||||
BYTE $0x66; \
|
||||
BYTE $0x0F; \
|
||||
BYTE $0x3A; \
|
||||
BYTE $0xDF; \
|
||||
BYTE $(0xC0 | r2<<3 | r1); \
|
||||
BYTE $(i)
|
||||
|
||||
TEXT aesni_init(SB), 0, $0
|
||||
MOVL $1, AX
|
||||
CPUID
|
||||
XORL AX, AX
|
||||
ANDL $(1<<25), CX
|
||||
JZ _ret
|
||||
|
||||
/* override aes function pointers */
|
||||
MOVQ $AESencrypt<>(SB), AX
|
||||
MOVQ AX, aes_encrypt(SB)
|
||||
MOVQ $AESdecrypt<>(SB), AX
|
||||
MOVQ AX, aes_decrypt(SB)
|
||||
|
||||
/* return setup function pointer */
|
||||
MOVQ $AESsetup<>(SB), AX
|
||||
_ret:
|
||||
RET
|
||||
|
||||
TEXT AESencrypt<>(SB), 0, $0
|
||||
MOVL Nr+8(FP), CX
|
||||
MOVQ pt+16(FP), SI
|
||||
MOVQ ct+24(FP), DI
|
||||
MOVO (RARG), X0
|
||||
MOVOU (SI), X7
|
||||
ADDQ $16, RARG
|
||||
PXOR X7, X0
|
||||
CMPL CX, $12
|
||||
JLT erounds10
|
||||
JEQ erounds12
|
||||
erounds14:
|
||||
MOVO 0(RARG), X1
|
||||
MOVO 16(RARG), X2
|
||||
ADDQ $32, RARG
|
||||
AESENC(1, 0)
|
||||
AESENC(2, 0)
|
||||
erounds12:
|
||||
MOVO 0(RARG), X3
|
||||
MOVO 16(RARG), X4
|
||||
ADDQ $32, RARG
|
||||
AESENC(3, 0)
|
||||
AESENC(4, 0)
|
||||
erounds10:
|
||||
MOVO 0(RARG), X1
|
||||
MOVO 16(RARG), X2
|
||||
MOVO 32(RARG), X3
|
||||
MOVO 48(RARG), X4
|
||||
MOVO 64(RARG), X5
|
||||
MOVO 80(RARG), X6
|
||||
MOVO 96(RARG), X7
|
||||
AESENC(1, 0)
|
||||
MOVO 112(RARG), X1
|
||||
AESENC(2, 0)
|
||||
MOVO 128(RARG), X2
|
||||
AESENC(3, 0)
|
||||
MOVO 144(RARG), X3
|
||||
AESENC(4, 0)
|
||||
AESENC(5, 0)
|
||||
AESENC(6, 0)
|
||||
AESENC(7, 0)
|
||||
|
||||
AESENC(1, 0)
|
||||
AESENC(2, 0)
|
||||
AESENCLAST(3, 0)
|
||||
MOVOU X0, (DI)
|
||||
RET
|
||||
|
||||
TEXT AESdecrypt<>(SB), 0, $0
|
||||
MOVL Nr+8(FP), CX
|
||||
MOVQ ct+16(FP), SI
|
||||
MOVQ pt+24(FP), DI
|
||||
MOVO (RARG), X0
|
||||
MOVOU (SI), X7
|
||||
ADDQ $16, RARG
|
||||
PXOR X7, X0
|
||||
CMPL CX, $12
|
||||
JLT drounds10
|
||||
JEQ drounds12
|
||||
drounds14:
|
||||
MOVO 0(RARG), X1
|
||||
MOVO 16(RARG), X2
|
||||
ADDQ $32, RARG
|
||||
AESDEC(1, 0)
|
||||
AESDEC(2, 0)
|
||||
drounds12:
|
||||
MOVO 0(RARG), X3
|
||||
MOVO 16(RARG), X4
|
||||
ADDQ $32, RARG
|
||||
AESDEC(3, 0)
|
||||
AESDEC(4, 0)
|
||||
drounds10:
|
||||
MOVO 0(RARG), X1
|
||||
MOVO 16(RARG), X2
|
||||
MOVO 32(RARG), X3
|
||||
MOVO 48(RARG), X4
|
||||
MOVO 64(RARG), X5
|
||||
MOVO 80(RARG), X6
|
||||
MOVO 96(RARG), X7
|
||||
AESDEC(1, 0)
|
||||
MOVO 112(RARG), X1
|
||||
AESDEC(2, 0)
|
||||
MOVO 128(RARG), X2
|
||||
AESDEC(3, 0)
|
||||
MOVO 144(RARG), X3
|
||||
AESDEC(4, 0)
|
||||
AESDEC(5, 0)
|
||||
AESDEC(6, 0)
|
||||
AESDEC(7, 0)
|
||||
|
||||
AESDEC(1, 0)
|
||||
AESDEC(2, 0)
|
||||
AESDECLAST(3, 0)
|
||||
MOVOU X0, (DI)
|
||||
RET
|
||||
|
||||
TEXT AESsetup<>(SB), 0, $16
|
||||
MOVQ RARG, erk+0(FP)
|
||||
MOVQ key+16(FP), DX
|
||||
MOVL nkey+24(FP), BX
|
||||
MOVQ DX, 8(SP)
|
||||
CMPL BX, $32
|
||||
JEQ esetup256
|
||||
CMPL BX, $24
|
||||
JEQ esetup192
|
||||
CMPL BX, $16
|
||||
JEQ esetup128
|
||||
XORL AX, AX
|
||||
RET
|
||||
esetup256:
|
||||
CALL setupEnc256<>(SB)
|
||||
JMP dsetup
|
||||
esetup192:
|
||||
CALL setupEnc192<>(SB)
|
||||
JMP dsetup
|
||||
esetup128:
|
||||
CALL setupEnc128<>(SB)
|
||||
dsetup:
|
||||
MOVQ erk+0(FP), SI
|
||||
MOVQ drk+8(FP), DI
|
||||
|
||||
MOVL AX, BX
|
||||
SHLL $4, BX
|
||||
ADDQ BX, SI
|
||||
|
||||
MOVO (SI), X0
|
||||
MOVO X0, (DI)
|
||||
|
||||
MOVO -16(SI), X1
|
||||
MOVO -32(SI), X2
|
||||
MOVO -48(SI), X3
|
||||
MOVO -64(SI), X4
|
||||
AESIMC(1, 1)
|
||||
AESIMC(2, 2)
|
||||
AESIMC(3, 3)
|
||||
AESIMC(4, 4)
|
||||
MOVO X1, 16(DI)
|
||||
MOVO X2, 32(DI)
|
||||
MOVO X3, 48(DI)
|
||||
MOVO X4, 64(DI)
|
||||
|
||||
MOVO -80(SI), X1
|
||||
MOVO -96(SI), X2
|
||||
MOVO -112(SI), X3
|
||||
MOVO -128(SI), X4
|
||||
AESIMC(1, 1)
|
||||
AESIMC(2, 2)
|
||||
AESIMC(3, 3)
|
||||
AESIMC(4, 4)
|
||||
MOVO X1, 80(DI)
|
||||
MOVO X2, 96(DI)
|
||||
MOVO X3, 112(DI)
|
||||
MOVO X4, 128(DI)
|
||||
|
||||
MOVO -144(SI), X1
|
||||
AESIMC(1, 1)
|
||||
MOVO X1, 144(DI)
|
||||
|
||||
CMPL AX, $10
|
||||
JEQ dsetupend
|
||||
|
||||
MOVO -160(SI), X1
|
||||
MOVO -176(SI), X2
|
||||
AESIMC(1, 1)
|
||||
AESIMC(2, 2)
|
||||
MOVO X1, 160(DI)
|
||||
MOVO X2, 176(DI)
|
||||
|
||||
CMPL AX, $12
|
||||
JEQ dsetupend
|
||||
|
||||
MOVO -192(SI), X1
|
||||
MOVO -208(SI), X2
|
||||
AESIMC(1, 1)
|
||||
AESIMC(2, 2)
|
||||
MOVO X1, 192(DI)
|
||||
MOVO X2, 208(DI)
|
||||
dsetupend:
|
||||
SUBQ BX, SI
|
||||
ADDQ BX, DI
|
||||
MOVO (SI), X0
|
||||
MOVO X0, (DI)
|
||||
RET
|
||||
|
||||
TEXT setupEnc128<>(SB), 0, $0
|
||||
MOVQ key+8(FP), SI
|
||||
MOVOU (SI), X1
|
||||
MOVO X1, (RARG)
|
||||
AESKEYGENASSIST(0x01, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 16(RARG)
|
||||
AESKEYGENASSIST(0x02, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 32(RARG)
|
||||
AESKEYGENASSIST(0x04, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 48(RARG)
|
||||
AESKEYGENASSIST(0x08, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 64(RARG)
|
||||
AESKEYGENASSIST(0x10, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 80(RARG)
|
||||
AESKEYGENASSIST(0x20, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 96(RARG)
|
||||
AESKEYGENASSIST(0x40, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 112(RARG)
|
||||
AESKEYGENASSIST(0x80, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 128(RARG)
|
||||
AESKEYGENASSIST(0x1b, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 144(RARG)
|
||||
AESKEYGENASSIST(0x36, 1, 0)
|
||||
CALL rk128<>(SB)
|
||||
MOVO X1, 160(RARG)
|
||||
MOVL $10, AX
|
||||
RET
|
||||
TEXT rk128<>(SB), 0, $0
|
||||
PSHUFL $0xff, X0, X0
|
||||
MOVO X1, X2
|
||||
PSLLO $4, X2
|
||||
PXOR X2, X1
|
||||
PSLLO $4, X2
|
||||
PXOR X2, X1
|
||||
PSLLO $4, X2
|
||||
PXOR X2, X1
|
||||
PXOR X0, X1
|
||||
RET
|
||||
|
||||
TEXT setupEnc192<>(SB), 0, $0
|
||||
MOVQ key+8(FP), SI
|
||||
MOVOU (SI), X1
|
||||
MOVOU 16(SI), X2
|
||||
MOVO X1, (RARG)
|
||||
MOVO X2, X5
|
||||
AESKEYGENASSIST(0x01, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
SHUFPD $0, X1, X5
|
||||
MOVO X5, 16(RARG)
|
||||
MOVO X1, X6
|
||||
SHUFPD $1, X2, X6
|
||||
MOVO X6, 32(RARG)
|
||||
AESKEYGENASSIST(0x02, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
MOVO X1, 48(RARG)
|
||||
MOVO X2, X5
|
||||
AESKEYGENASSIST(0x04, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
SHUFPD $0, X1, X5
|
||||
MOVO X5, 64(RARG)
|
||||
MOVO X1, X6
|
||||
SHUFPD $1, X2, X6
|
||||
MOVO X6, 80(RARG)
|
||||
AESKEYGENASSIST(0x08, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
MOVO X1, 96(RARG)
|
||||
MOVO X2, X5
|
||||
AESKEYGENASSIST(0x10, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
SHUFPD $0, X1, X5
|
||||
MOVO X5, 112(RARG)
|
||||
MOVO X1, X6
|
||||
SHUFPD $1, X2, X6
|
||||
MOVO X6, 128(RARG)
|
||||
AESKEYGENASSIST(0x20, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
MOVO X1, 144(RARG)
|
||||
MOVO X2, X5
|
||||
AESKEYGENASSIST(0x40, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
SHUFPD $0, X1, X5
|
||||
MOVO X5, 160(RARG)
|
||||
MOVO X1, X6
|
||||
SHUFPD $1, X2, X6
|
||||
MOVO X6, 176(RARG)
|
||||
AESKEYGENASSIST(0x80, 2, 0)
|
||||
CALL rk192<>(SB)
|
||||
MOVO X1, 192(RARG)
|
||||
MOVL $12, AX
|
||||
RET
|
||||
TEXT rk192<>(SB), 0, $0
|
||||
PSHUFL $0x55, X0, X0
|
||||
MOVOU X1, X4
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PXOR X0, X1
|
||||
PSHUFL $0xff, X1, X0
|
||||
MOVOU X2, X4
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X2
|
||||
PXOR X0, X2
|
||||
RET
|
||||
|
||||
TEXT setupEnc256<>(SB), 0, $0
|
||||
MOVQ key+8(FP), SI
|
||||
MOVOU (SI), X1
|
||||
MOVOU 16(SI), X2
|
||||
MOVO X1, (RARG)
|
||||
MOVO X2, 16(RARG)
|
||||
AESKEYGENASSIST(0x01, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 32(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 48(RARG)
|
||||
AESKEYGENASSIST(0x02, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 64(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 80(RARG)
|
||||
AESKEYGENASSIST(0x04, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 96(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 112(RARG)
|
||||
AESKEYGENASSIST(0x08, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 128(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 144(RARG)
|
||||
AESKEYGENASSIST(0x10, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 160(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 176(RARG)
|
||||
AESKEYGENASSIST(0x20, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 192(RARG)
|
||||
AESKEYGENASSIST(0x00, 1, 0)
|
||||
CALL rk256_b<>(SB)
|
||||
MOVO X2, 208(RARG)
|
||||
AESKEYGENASSIST(0x40, 2, 0)
|
||||
CALL rk256_a<>(SB)
|
||||
MOVO X1, 224(RARG)
|
||||
MOVL $14, AX
|
||||
RET
|
||||
TEXT rk256_a<>(SB), 0, $0
|
||||
PSHUFL $0xff, X0, X0
|
||||
MOVO X1, X4
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X1
|
||||
PXOR X0, X1
|
||||
RET
|
||||
TEXT rk256_b<>(SB), 0, $0
|
||||
PSHUFL $0xaa, X0, X0
|
||||
MOVO X2, X4
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X2
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X2
|
||||
PSLLO $4, X4
|
||||
PXOR X4, X2
|
||||
PXOR X0, X2
|
||||
RET
|
|
@ -5,6 +5,7 @@ LIB=/$objtype/lib/libsec.a
|
|||
FILES=\
|
||||
md5block\
|
||||
sha1block\
|
||||
aesni\
|
||||
|
||||
HFILES=/sys/include/libsec.h
|
||||
|
||||
|
|
|
@ -34,99 +34,12 @@
|
|||
typedef uchar u8;
|
||||
typedef ulong u32;
|
||||
|
||||
#define FULL_UNROLL
|
||||
#define const
|
||||
|
||||
static const u32 Td0[256];
|
||||
static const u32 Td1[256];
|
||||
static const u32 Td2[256];
|
||||
static const u32 Td3[256];
|
||||
static const u8 Te4[256];
|
||||
|
||||
static int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
|
||||
int keyBits);
|
||||
static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
|
||||
const uchar cipherKey[], int keyBits);
|
||||
|
||||
void aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
|
||||
void aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
|
||||
|
||||
#define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
|
||||
((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
|
||||
#define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
|
||||
(ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
|
||||
|
||||
void
|
||||
setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
|
||||
{
|
||||
memset(s, 0, sizeof(*s));
|
||||
if(keybytes > AESmaxkey)
|
||||
keybytes = AESmaxkey;
|
||||
memmove(s->key, key, keybytes);
|
||||
s->keybytes = keybytes;
|
||||
s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
|
||||
if(ivec != nil)
|
||||
memmove(s->ivec, ivec, AESbsize);
|
||||
if(keybytes==16 || keybytes==24 || keybytes==32)
|
||||
s->setup = 0xcafebabe;
|
||||
/* else aes_setup was invalid */
|
||||
}
|
||||
|
||||
/*
|
||||
* this function has been changed for plan 9.
|
||||
* Expand the cipher key into the encryption and decryption key schedules.
|
||||
*
|
||||
* @return the number of rounds for the given cipher key size.
|
||||
*/
|
||||
static int
|
||||
aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
|
||||
const uchar cipherKey[], int keyBits)
|
||||
{
|
||||
int Nr, i;
|
||||
|
||||
/* expand the cipher key: */
|
||||
Nr = aes_setupEnc(erk, cipherKey, keyBits);
|
||||
|
||||
/*
|
||||
* invert the order of the round keys and apply the inverse MixColumn
|
||||
* transform to all round keys but the first and the last
|
||||
*/
|
||||
drk[0 ] = erk[4*Nr ];
|
||||
drk[1 ] = erk[4*Nr + 1];
|
||||
drk[2 ] = erk[4*Nr + 2];
|
||||
drk[3 ] = erk[4*Nr + 3];
|
||||
drk[4*Nr ] = erk[0 ];
|
||||
drk[4*Nr + 1] = erk[1 ];
|
||||
drk[4*Nr + 2] = erk[2 ];
|
||||
drk[4*Nr + 3] = erk[3 ];
|
||||
erk += 4 * Nr;
|
||||
for (i = 1; i < Nr; i++) {
|
||||
drk += 4;
|
||||
erk -= 4;
|
||||
drk[0] =
|
||||
Td0[Te4[(erk[0] >> 24) ]] ^
|
||||
Td1[Te4[(erk[0] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[0] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[0] ) & 0xff]];
|
||||
drk[1] =
|
||||
Td0[Te4[(erk[1] >> 24) ]] ^
|
||||
Td1[Te4[(erk[1] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[1] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[1] ) & 0xff]];
|
||||
drk[2] =
|
||||
Td0[Te4[(erk[2] >> 24) ]] ^
|
||||
Td1[Te4[(erk[2] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[2] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[2] ) & 0xff]];
|
||||
drk[3] =
|
||||
Td0[Te4[(erk[3] >> 24) ]] ^
|
||||
Td1[Te4[(erk[3] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[3] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[3] ) & 0xff]];
|
||||
}
|
||||
return Nr;
|
||||
}
|
||||
|
||||
#define FULL_UNROLL
|
||||
|
||||
/*
|
||||
Te0[x] = S [x].[02, 01, 01, 03];
|
||||
|
@ -142,7 +55,7 @@ Td3[x] = Si[x].[09, 0d, 0b, 0e];
|
|||
Td4[x] = Si[x]
|
||||
*/
|
||||
|
||||
static const u32 Te0[256] = {
|
||||
static u32 Te0[256] = {
|
||||
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
|
||||
0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
|
||||
0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
|
||||
|
@ -208,7 +121,7 @@ static const u32 Te0[256] = {
|
|||
0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
|
||||
0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
|
||||
};
|
||||
static const u32 Te1[256] = {
|
||||
static u32 Te1[256] = {
|
||||
0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
|
||||
0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
|
||||
0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
|
||||
|
@ -274,7 +187,7 @@ static const u32 Te1[256] = {
|
|||
0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
|
||||
0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
|
||||
};
|
||||
static const u32 Te2[256] = {
|
||||
static u32 Te2[256] = {
|
||||
0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
|
||||
0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
|
||||
0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
|
||||
|
@ -340,7 +253,7 @@ static const u32 Te2[256] = {
|
|||
0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
|
||||
0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
|
||||
};
|
||||
static const u32 Te3[256] = {
|
||||
static u32 Te3[256] = {
|
||||
|
||||
0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
|
||||
0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
|
||||
|
@ -407,7 +320,7 @@ static const u32 Te3[256] = {
|
|||
0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
|
||||
0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
|
||||
};
|
||||
static const u8 Te4[256] = {
|
||||
static u8 Te4[256] = {
|
||||
0x63U, 0x7cU, 0x77U, 0x7bU,
|
||||
0xf2U, 0x6bU, 0x6fU, 0xc5U,
|
||||
0x30U, 0x01U, 0x67U, 0x2bU,
|
||||
|
@ -473,7 +386,7 @@ static const u8 Te4[256] = {
|
|||
0x41U, 0x99U, 0x2dU, 0x0fU,
|
||||
0xb0U, 0x54U, 0xbbU, 0x16U,
|
||||
};
|
||||
static const u32 Td0[256] = {
|
||||
static u32 Td0[256] = {
|
||||
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
|
||||
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
|
||||
0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
|
||||
|
@ -539,7 +452,7 @@ static const u32 Td0[256] = {
|
|||
0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
|
||||
0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
|
||||
};
|
||||
static const u32 Td1[256] = {
|
||||
static u32 Td1[256] = {
|
||||
0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
|
||||
0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
|
||||
0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
|
||||
|
@ -605,7 +518,7 @@ static const u32 Td1[256] = {
|
|||
0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
|
||||
0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
|
||||
};
|
||||
static const u32 Td2[256] = {
|
||||
static u32 Td2[256] = {
|
||||
0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
|
||||
0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
|
||||
0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
|
||||
|
@ -672,7 +585,7 @@ static const u32 Td2[256] = {
|
|||
0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
|
||||
0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
|
||||
};
|
||||
static const u32 Td3[256] = {
|
||||
static u32 Td3[256] = {
|
||||
0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
|
||||
0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
|
||||
0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
|
||||
|
@ -738,7 +651,7 @@ static const u32 Td3[256] = {
|
|||
0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
|
||||
0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
|
||||
};
|
||||
static const u8 Td4[256] = {
|
||||
static u8 Td4[256] = {
|
||||
0x52U, 0x09U, 0x6aU, 0xd5U,
|
||||
0x30U, 0x36U, 0xa5U, 0x38U,
|
||||
0xbfU, 0x40U, 0xa3U, 0x9eU,
|
||||
|
@ -804,7 +717,7 @@ static const u8 Td4[256] = {
|
|||
0xe1U, 0x69U, 0x14U, 0x63U,
|
||||
0x55U, 0x21U, 0x0cU, 0x7dU,
|
||||
};
|
||||
static const u32 rcon[] = {
|
||||
static u32 rcon[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000,
|
||||
0x10000000, 0x20000000, 0x40000000, 0x80000000,
|
||||
0x1B000000, 0x36000000,
|
||||
|
@ -817,16 +730,16 @@ static const u32 rcon[] = {
|
|||
* @return the number of rounds for the given cipher key size.
|
||||
*/
|
||||
static int
|
||||
aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
|
||||
setupEnc(ulong rk[/*4*(Nr + 1)*/], uchar key[], int nkey)
|
||||
{
|
||||
int i = 0;
|
||||
u32 temp;
|
||||
|
||||
rk[0] = GETU32(cipherKey );
|
||||
rk[1] = GETU32(cipherKey + 4);
|
||||
rk[2] = GETU32(cipherKey + 8);
|
||||
rk[3] = GETU32(cipherKey + 12);
|
||||
if (keyBits == 128) {
|
||||
rk[0] = GETU32(key );
|
||||
rk[1] = GETU32(key + 4);
|
||||
rk[2] = GETU32(key + 8);
|
||||
rk[3] = GETU32(key + 12);
|
||||
if (nkey == 16) {
|
||||
for (;;) {
|
||||
temp = rk[3];
|
||||
rk[4] = rk[0] ^
|
||||
|
@ -844,9 +757,9 @@ aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
|
|||
rk += 4;
|
||||
}
|
||||
}
|
||||
rk[4] = GETU32(cipherKey + 16);
|
||||
rk[5] = GETU32(cipherKey + 20);
|
||||
if (keyBits == 192) {
|
||||
rk[4] = GETU32(key + 16);
|
||||
rk[5] = GETU32(key + 20);
|
||||
if (nkey == 24) {
|
||||
for (;;) {
|
||||
temp = rk[ 5];
|
||||
rk[ 6] = rk[ 0] ^
|
||||
|
@ -866,9 +779,9 @@ aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
|
|||
rk += 6;
|
||||
}
|
||||
}
|
||||
rk[6] = GETU32(cipherKey + 24);
|
||||
rk[7] = GETU32(cipherKey + 28);
|
||||
if (keyBits == 256) {
|
||||
rk[6] = GETU32(key + 24);
|
||||
rk[7] = GETU32(key + 28);
|
||||
if (nkey == 32) {
|
||||
for (;;) {
|
||||
temp = rk[ 7];
|
||||
rk[ 8] = rk[ 0] ^
|
||||
|
@ -898,10 +811,62 @@ aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Expand the cipher key into the encryption and decryption key schedules.
|
||||
*
|
||||
* @return the number of rounds for the given cipher key size.
|
||||
*/
|
||||
static int
|
||||
AESsetup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */], uchar key[], int nkey)
|
||||
{
|
||||
int Nr, i;
|
||||
|
||||
/* expand the cipher key: */
|
||||
Nr = setupEnc(erk, key, nkey);
|
||||
|
||||
/*
|
||||
* invert the order of the round keys and apply the inverse MixColumn
|
||||
* transform to all round keys but the first and the last
|
||||
*/
|
||||
drk[0 ] = erk[4*Nr ];
|
||||
drk[1 ] = erk[4*Nr + 1];
|
||||
drk[2 ] = erk[4*Nr + 2];
|
||||
drk[3 ] = erk[4*Nr + 3];
|
||||
drk[4*Nr ] = erk[0 ];
|
||||
drk[4*Nr + 1] = erk[1 ];
|
||||
drk[4*Nr + 2] = erk[2 ];
|
||||
drk[4*Nr + 3] = erk[3 ];
|
||||
erk += 4 * Nr;
|
||||
for (i = 1; i < Nr; i++) {
|
||||
drk += 4;
|
||||
erk -= 4;
|
||||
drk[0] =
|
||||
Td0[Te4[(erk[0] >> 24) ]] ^
|
||||
Td1[Te4[(erk[0] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[0] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[0] ) & 0xff]];
|
||||
drk[1] =
|
||||
Td0[Te4[(erk[1] >> 24) ]] ^
|
||||
Td1[Te4[(erk[1] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[1] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[1] ) & 0xff]];
|
||||
drk[2] =
|
||||
Td0[Te4[(erk[2] >> 24) ]] ^
|
||||
Td1[Te4[(erk[2] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[2] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[2] ) & 0xff]];
|
||||
drk[3] =
|
||||
Td0[Te4[(erk[3] >> 24) ]] ^
|
||||
Td1[Te4[(erk[3] >> 16) & 0xff]] ^
|
||||
Td2[Te4[(erk[3] >> 8) & 0xff]] ^
|
||||
Td3[Te4[(erk[3] ) & 0xff]];
|
||||
}
|
||||
return Nr;
|
||||
}
|
||||
|
||||
/* using round keys in rk, perform Nr rounds of encrypting pt into ct */
|
||||
void
|
||||
aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
|
||||
uchar ct[16])
|
||||
static void
|
||||
AESencrypt(ulong rk[/* 4*(Nr + 1) */], int Nr, uchar pt[16], uchar ct[16])
|
||||
{
|
||||
ulong s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
#ifndef FULL_UNROLL
|
||||
|
@ -1082,9 +1047,8 @@ aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
|
|||
PUTU32(ct + 12, s3);
|
||||
}
|
||||
|
||||
void
|
||||
aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
|
||||
uchar pt[16])
|
||||
static void
|
||||
AESdecrypt(ulong rk[/* 4*(Nr + 1) */], int Nr, uchar ct[16], uchar pt[16])
|
||||
{
|
||||
ulong s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
#ifndef FULL_UNROLL
|
||||
|
@ -1265,190 +1229,29 @@ aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
|
|||
PUTU32(pt + 12, s3);
|
||||
}
|
||||
|
||||
#ifdef INTERMEDIATE_VALUE_KAT
|
||||
void (*aes_encrypt)(ulong rk[], int Nr, uchar pt[16], uchar ct[16]) = AESencrypt;
|
||||
void (*aes_decrypt)(ulong rk[], int Nr, uchar ct[16], uchar pt[16]) = AESdecrypt;
|
||||
|
||||
static void
|
||||
aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
|
||||
int rounds)
|
||||
void
|
||||
setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec)
|
||||
{
|
||||
int r;
|
||||
u32 s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
static int (*aes_setup)(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */], uchar key[], int nkey);
|
||||
|
||||
/*
|
||||
* map byte array block to cipher state
|
||||
* and add initial round key:
|
||||
*/
|
||||
s0 = GETU32(block ) ^ rk[0];
|
||||
s1 = GETU32(block + 4) ^ rk[1];
|
||||
s2 = GETU32(block + 8) ^ rk[2];
|
||||
s3 = GETU32(block + 12) ^ rk[3];
|
||||
rk += 4;
|
||||
|
||||
/*
|
||||
* Nr - 1 full rounds:
|
||||
*/
|
||||
for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
|
||||
t0 =
|
||||
Te0[(s0 >> 24) ] ^
|
||||
Te1[(s1 >> 16) & 0xff] ^
|
||||
Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[(s3 ) & 0xff] ^
|
||||
rk[0];
|
||||
t1 =
|
||||
Te0[(s1 >> 24) ] ^
|
||||
Te1[(s2 >> 16) & 0xff] ^
|
||||
Te2[(s3 >> 8) & 0xff] ^
|
||||
Te3[(s0 ) & 0xff] ^
|
||||
rk[1];
|
||||
t2 =
|
||||
Te0[(s2 >> 24) ] ^
|
||||
Te1[(s3 >> 16) & 0xff] ^
|
||||
Te2[(s0 >> 8) & 0xff] ^
|
||||
Te3[(s1 ) & 0xff] ^
|
||||
rk[2];
|
||||
t3 =
|
||||
Te0[(s3 >> 24) ] ^
|
||||
Te1[(s0 >> 16) & 0xff] ^
|
||||
Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[(s2 ) & 0xff] ^
|
||||
rk[3];
|
||||
s0 = t0;
|
||||
s1 = t1;
|
||||
s2 = t2;
|
||||
s3 = t3;
|
||||
rk += 4;
|
||||
if(aes_setup == nil){
|
||||
extern void *aesni_init(void);
|
||||
if((aes_setup = aesni_init()) == nil)
|
||||
aes_setup = AESsetup;
|
||||
}
|
||||
|
||||
/*
|
||||
* apply last round and
|
||||
* map cipher state to byte array block:
|
||||
*/
|
||||
if (rounds == Nr) {
|
||||
t0 =
|
||||
(Te4[(s0 >> 24) ] << 24) ^
|
||||
(Te4[(s1 >> 16) & 0xff] << 16) ^
|
||||
(Te4[(s2 >> 8) & 0xff] << 8) ^
|
||||
(Te4[(s3 ) & 0xff] ) ^
|
||||
rk[0];
|
||||
t1 =
|
||||
(Te4[(s1 >> 24) ] << 24) ^
|
||||
(Te4[(s2 >> 16) & 0xff] << 16) ^
|
||||
(Te4[(s3 >> 8) & 0xff] << 8) ^
|
||||
(Te4[(s0 ) & 0xff] ) ^
|
||||
rk[1];
|
||||
t2 =
|
||||
(Te4[(s2 >> 24) ] << 24) ^
|
||||
(Te4[(s3 >> 16) & 0xff] << 16) ^
|
||||
(Te4[(s0 >> 8) & 0xff] << 8) ^
|
||||
(Te4[(s1 ) & 0xff] ) ^
|
||||
rk[2];
|
||||
t3 =
|
||||
(Te4[(s3 >> 24) ] << 24) ^
|
||||
(Te4[(s0 >> 16) & 0xff] << 16) ^
|
||||
(Te4[(s1 >> 8) & 0xff] << 8) ^
|
||||
(Te4[(s2 ) & 0xff] ) ^
|
||||
rk[3];
|
||||
s0 = t0;
|
||||
s1 = t1;
|
||||
s2 = t2;
|
||||
s3 = t3;
|
||||
memset(s, 0, sizeof(*s));
|
||||
if(nkey > AESmaxkey)
|
||||
nkey = AESmaxkey;
|
||||
memmove(s->key, key, nkey);
|
||||
s->keybytes = nkey;
|
||||
s->ekey = s->storage+16 - (s->storage - (uchar*)0 & 15);
|
||||
s->dkey = (uchar*)s->ekey + 16*(AESmaxrounds+1);
|
||||
s->rounds = (*aes_setup)(s->ekey, s->dkey, s->key, nkey);
|
||||
if(ivec != nil)
|
||||
memmove(s->ivec, ivec, AESbsize);
|
||||
if(s->rounds != 0)
|
||||
s->setup = 0xcafebabe;
|
||||
}
|
||||
|
||||
PUTU32(block , s0);
|
||||
PUTU32(block + 4, s1);
|
||||
PUTU32(block + 8, s2);
|
||||
PUTU32(block + 12, s3);
|
||||
}
|
||||
|
||||
static void
|
||||
aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
|
||||
int rounds)
|
||||
{
|
||||
int r;
|
||||
u32 s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
|
||||
/*
|
||||
* map byte array block to cipher state
|
||||
* and add initial round key:
|
||||
*/
|
||||
s0 = GETU32(block ) ^ rk[0];
|
||||
s1 = GETU32(block + 4) ^ rk[1];
|
||||
s2 = GETU32(block + 8) ^ rk[2];
|
||||
s3 = GETU32(block + 12) ^ rk[3];
|
||||
rk += 4;
|
||||
|
||||
/*
|
||||
* Nr - 1 full rounds:
|
||||
*/
|
||||
for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
|
||||
t0 =
|
||||
Td0[(s0 >> 24) ] ^
|
||||
Td1[(s3 >> 16) & 0xff] ^
|
||||
Td2[(s2 >> 8) & 0xff] ^
|
||||
Td3[(s1 ) & 0xff] ^
|
||||
rk[0];
|
||||
t1 =
|
||||
Td0[(s1 >> 24) ] ^
|
||||
Td1[(s0 >> 16) & 0xff] ^
|
||||
Td2[(s3 >> 8) & 0xff] ^
|
||||
Td3[(s2 ) & 0xff] ^
|
||||
rk[1];
|
||||
t2 =
|
||||
Td0[(s2 >> 24) ] ^
|
||||
Td1[(s1 >> 16) & 0xff] ^
|
||||
Td2[(s0 >> 8) & 0xff] ^
|
||||
Td3[(s3 ) & 0xff] ^
|
||||
rk[2];
|
||||
t3 =
|
||||
Td0[(s3 >> 24) ] ^
|
||||
Td1[(s2 >> 16) & 0xff] ^
|
||||
Td2[(s1 >> 8) & 0xff] ^
|
||||
Td3[(s0 ) & 0xff] ^
|
||||
rk[3];
|
||||
|
||||
s0 = t0;
|
||||
s1 = t1;
|
||||
s2 = t2;
|
||||
s3 = t3;
|
||||
rk += 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* complete the last round and
|
||||
* map cipher state to byte array block:
|
||||
*/
|
||||
t0 =
|
||||
(Td4[(s0 >> 24) ] << 24) ^
|
||||
(Td4[(s3 >> 16) & 0xff] << 16) ^
|
||||
(Td4[(s2 >> 8) & 0xff] << 8) ^
|
||||
(Td4[(s1 ) & 0xff] );
|
||||
t1 =
|
||||
(Td4[(s1 >> 24) ] << 24) ^
|
||||
(Td4[(s0 >> 16) & 0xff] << 16) ^
|
||||
(Td4[(s3 >> 8) & 0xff] << 8) ^
|
||||
(Td4[(s2 ) & 0xff] );
|
||||
t2 =
|
||||
(Td4[(s2 >> 24) ] << 24) ^
|
||||
(Td4[(s1 >> 16) & 0xff] << 16) ^
|
||||
(Td4[(s0 >> 8) & 0xff] << 8) ^
|
||||
(Td4[(s3 ) & 0xff] );
|
||||
t3 =
|
||||
(Td4[(s3 >> 24) ] << 24) ^
|
||||
(Td4[(s2 >> 16) & 0xff] << 16) ^
|
||||
(Td4[(s1 >> 8) & 0xff] << 8) ^
|
||||
(Td4[(s0 ) & 0xff] );
|
||||
|
||||
if (rounds == Nr) {
|
||||
t0 ^= rk[0];
|
||||
t1 ^= rk[1];
|
||||
t2 ^= rk[2];
|
||||
t3 ^= rk[3];
|
||||
}
|
||||
|
||||
PUTU32(block , t0);
|
||||
PUTU32(block + 4, t1);
|
||||
PUTU32(block + 8, t2);
|
||||
PUTU32(block + 12, t3);
|
||||
}
|
||||
|
||||
#endif /* INTERMEDIATE_VALUE_KAT */
|
||||
|
|
|
@ -150,9 +150,10 @@ setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
|
|||
{
|
||||
setupAESstate(s, key, keylen, nil);
|
||||
|
||||
memset(s->mackey, 0, AESbsize);
|
||||
aes_encrypt(s->ekey, s->rounds, s->mackey, s->mackey);
|
||||
load128(s->mackey, s->H);
|
||||
memset(s->ivec, 0, AESbsize);
|
||||
aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
|
||||
load128(s->ivec, s->H);
|
||||
memset(s->ivec, 0, AESbsize);
|
||||
prepareM(s->H, s->M);
|
||||
|
||||
if(iv != nil && ivlen > 0)
|
||||
|
|
5
sys/src/libsec/port/aesni.c
Normal file
5
sys/src/libsec/port/aesni.c
Normal file
|
@ -0,0 +1,5 @@
|
|||
void*
|
||||
aesni_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
LIB=/$objtype/lib/libsec.a
|
||||
|
||||
CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
|
||||
aes.c aesCBC.c aesCFB.c aesOFB.c aesXCBmac.c aes_gcm.c \
|
||||
aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
|
||||
blowfish.c \
|
||||
hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
|
||||
sha2_64.c sha2_128.c sha2block64.c sha2block128.c\
|
||||
|
|
Loading…
Reference in a new issue