summaryrefslogtreecommitdiff
path: root/sys/src/libsec/amd64
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@felloff.net>2017-11-12 23:15:15 +0100
committercinap_lenrek <cinap_lenrek@felloff.net>2017-11-12 23:15:15 +0100
commit3356e0e731bb8e0f4c82caebe358fae2c8fc9113 (patch)
treea92fcc0632401e8a1701f6b386c180ec6f7317c9 /sys/src/libsec/amd64
parent4f27f6a04f8c8709e20767b50bd7c2a22ab29340 (diff)
libsec: AES-NI support for amd64
Add assembler versions for aes_encrypt/aes_decrypt and the key setup using AES-NI instruction set. This makes aes_encrypt and aes_decrypt into function pointers which get initialized by the first call to setupAESstate(). Note that the expanded round key words are *NOT* stored in big endian order as with the portable implementation. For that reason the AESstate.ekey and AESstate.dkey fields have been changed to void* forcing an error when someone is accessing the roundkey words. One offender was aesXCBmac, which doesnt appear to be used and the code looks horrible so it has been deleted. The AES-NI implementation is for amd64 only as it requires the kernel to save/restore the FPU state across syscalls and pagefaults.
Diffstat (limited to 'sys/src/libsec/amd64')
-rw-r--r--sys/src/libsec/amd64/aesni.s408
-rw-r--r--sys/src/libsec/amd64/mkfile1
2 files changed, 409 insertions, 0 deletions
diff --git a/sys/src/libsec/amd64/aesni.s b/sys/src/libsec/amd64/aesni.s
new file mode 100644
index 000000000..37275cdbf
--- /dev/null
+++ b/sys/src/libsec/amd64/aesni.s
@@ -0,0 +1,408 @@
+#define AESOP(o,r1,r2) \
+ BYTE $0x66; \
+ BYTE $0x0F; \
+ BYTE $0x38; \
+ BYTE $(o); \
+ BYTE $(0xC0 | r2<<3 | r1)
+
+#define AESIMC(r1,r2) AESOP(0xDB,r1,r2)
+#define AESENC(r1,r2) AESOP(0xDC,r1,r2)
+#define AESENCLAST(r1,r2) AESOP(0xDD,r1,r2)
+#define AESDEC(r1,r2) AESOP(0xDE,r1,r2)
+#define AESDECLAST(r1,r2) AESOP(0xDF,r1,r2)
+
+#define AESKEYGENASSIST(i,r1,r2) \
+ BYTE $0x66; \
+ BYTE $0x0F; \
+ BYTE $0x3A; \
+ BYTE $0xDF; \
+ BYTE $(0xC0 | r2<<3 | r1); \
+ BYTE $(i)
+
+TEXT aesni_init(SB), 0, $0
+ MOVL $1, AX
+ CPUID
+ XORL AX, AX
+ ANDL $(1<<25), CX
+ JZ _ret
+
+ /* override aes function pointers */
+ MOVQ $AESencrypt<>(SB), AX
+ MOVQ AX, aes_encrypt(SB)
+ MOVQ $AESdecrypt<>(SB), AX
+ MOVQ AX, aes_decrypt(SB)
+
+ /* return setup function pointer */
+ MOVQ $AESsetup<>(SB), AX
+_ret:
+ RET
+
+TEXT AESencrypt<>(SB), 0, $0
+ MOVL Nr+8(FP), CX
+ MOVQ pt+16(FP), SI
+ MOVQ ct+24(FP), DI
+ MOVO (RARG), X0
+ MOVOU (SI), X7
+ ADDQ $16, RARG
+ PXOR X7, X0
+ CMPL CX, $12
+ JLT erounds10
+ JEQ erounds12
+erounds14:
+ MOVO 0(RARG), X1
+ MOVO 16(RARG), X2
+ ADDQ $32, RARG
+ AESENC(1, 0)
+ AESENC(2, 0)
+erounds12:
+ MOVO 0(RARG), X3
+ MOVO 16(RARG), X4
+ ADDQ $32, RARG
+ AESENC(3, 0)
+ AESENC(4, 0)
+erounds10:
+ MOVO 0(RARG), X1
+ MOVO 16(RARG), X2
+ MOVO 32(RARG), X3
+ MOVO 48(RARG), X4
+ MOVO 64(RARG), X5
+ MOVO 80(RARG), X6
+ MOVO 96(RARG), X7
+ AESENC(1, 0)
+ MOVO 112(RARG), X1
+ AESENC(2, 0)
+ MOVO 128(RARG), X2
+ AESENC(3, 0)
+ MOVO 144(RARG), X3
+ AESENC(4, 0)
+ AESENC(5, 0)
+ AESENC(6, 0)
+ AESENC(7, 0)
+
+ AESENC(1, 0)
+ AESENC(2, 0)
+ AESENCLAST(3, 0)
+ MOVOU X0, (DI)
+ RET
+
+TEXT AESdecrypt<>(SB), 0, $0
+ MOVL Nr+8(FP), CX
+ MOVQ ct+16(FP), SI
+ MOVQ pt+24(FP), DI
+ MOVO (RARG), X0
+ MOVOU (SI), X7
+ ADDQ $16, RARG
+ PXOR X7, X0
+ CMPL CX, $12
+ JLT drounds10
+ JEQ drounds12
+drounds14:
+ MOVO 0(RARG), X1
+ MOVO 16(RARG), X2
+ ADDQ $32, RARG
+ AESDEC(1, 0)
+ AESDEC(2, 0)
+drounds12:
+ MOVO 0(RARG), X3
+ MOVO 16(RARG), X4
+ ADDQ $32, RARG
+ AESDEC(3, 0)
+ AESDEC(4, 0)
+drounds10:
+ MOVO 0(RARG), X1
+ MOVO 16(RARG), X2
+ MOVO 32(RARG), X3
+ MOVO 48(RARG), X4
+ MOVO 64(RARG), X5
+ MOVO 80(RARG), X6
+ MOVO 96(RARG), X7
+ AESDEC(1, 0)
+ MOVO 112(RARG), X1
+ AESDEC(2, 0)
+ MOVO 128(RARG), X2
+ AESDEC(3, 0)
+ MOVO 144(RARG), X3
+ AESDEC(4, 0)
+ AESDEC(5, 0)
+ AESDEC(6, 0)
+ AESDEC(7, 0)
+
+ AESDEC(1, 0)
+ AESDEC(2, 0)
+ AESDECLAST(3, 0)
+ MOVOU X0, (DI)
+ RET
+
+TEXT AESsetup<>(SB), 0, $16
+ MOVQ RARG, erk+0(FP)
+ MOVQ key+16(FP), DX
+ MOVL nkey+24(FP), BX
+ MOVQ DX, 8(SP)
+ CMPL BX, $32
+ JEQ esetup256
+ CMPL BX, $24
+ JEQ esetup192
+ CMPL BX, $16
+ JEQ esetup128
+ XORL AX, AX
+ RET
+esetup256:
+ CALL setupEnc256<>(SB)
+ JMP dsetup
+esetup192:
+ CALL setupEnc192<>(SB)
+ JMP dsetup
+esetup128:
+ CALL setupEnc128<>(SB)
+dsetup:
+ MOVQ erk+0(FP), SI
+ MOVQ drk+8(FP), DI
+
+ MOVL AX, BX
+ SHLL $4, BX
+ ADDQ BX, SI
+
+ MOVO (SI), X0
+ MOVO X0, (DI)
+
+ MOVO -16(SI), X1
+ MOVO -32(SI), X2
+ MOVO -48(SI), X3
+ MOVO -64(SI), X4
+ AESIMC(1, 1)
+ AESIMC(2, 2)
+ AESIMC(3, 3)
+ AESIMC(4, 4)
+ MOVO X1, 16(DI)
+ MOVO X2, 32(DI)
+ MOVO X3, 48(DI)
+ MOVO X4, 64(DI)
+
+ MOVO -80(SI), X1
+ MOVO -96(SI), X2
+ MOVO -112(SI), X3
+ MOVO -128(SI), X4
+ AESIMC(1, 1)
+ AESIMC(2, 2)
+ AESIMC(3, 3)
+ AESIMC(4, 4)
+ MOVO X1, 80(DI)
+ MOVO X2, 96(DI)
+ MOVO X3, 112(DI)
+ MOVO X4, 128(DI)
+
+ MOVO -144(SI), X1
+ AESIMC(1, 1)
+ MOVO X1, 144(DI)
+
+ CMPL AX, $10
+ JEQ dsetupend
+
+ MOVO -160(SI), X1
+ MOVO -176(SI), X2
+ AESIMC(1, 1)
+ AESIMC(2, 2)
+ MOVO X1, 160(DI)
+ MOVO X2, 176(DI)
+
+ CMPL AX, $12
+ JEQ dsetupend
+
+ MOVO -192(SI), X1
+ MOVO -208(SI), X2
+ AESIMC(1, 1)
+ AESIMC(2, 2)
+ MOVO X1, 192(DI)
+ MOVO X2, 208(DI)
+dsetupend:
+ SUBQ BX, SI
+ ADDQ BX, DI
+ MOVO (SI), X0
+ MOVO X0, (DI)
+ RET
+
+TEXT setupEnc128<>(SB), 0, $0
+ MOVQ key+8(FP), SI
+ MOVOU (SI), X1
+ MOVO X1, (RARG)
+ AESKEYGENASSIST(0x01, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 16(RARG)
+ AESKEYGENASSIST(0x02, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 32(RARG)
+ AESKEYGENASSIST(0x04, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 48(RARG)
+ AESKEYGENASSIST(0x08, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 64(RARG)
+ AESKEYGENASSIST(0x10, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 80(RARG)
+ AESKEYGENASSIST(0x20, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 96(RARG)
+ AESKEYGENASSIST(0x40, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 112(RARG)
+ AESKEYGENASSIST(0x80, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 128(RARG)
+ AESKEYGENASSIST(0x1b, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 144(RARG)
+ AESKEYGENASSIST(0x36, 1, 0)
+ CALL rk128<>(SB)
+ MOVO X1, 160(RARG)
+ MOVL $10, AX
+ RET
+TEXT rk128<>(SB), 0, $0
+ PSHUFL $0xff, X0, X0
+ MOVO X1, X2
+ PSLLO $4, X2
+ PXOR X2, X1
+ PSLLO $4, X2
+ PXOR X2, X1
+ PSLLO $4, X2
+ PXOR X2, X1
+ PXOR X0, X1
+ RET
+
+TEXT setupEnc192<>(SB), 0, $0
+ MOVQ key+8(FP), SI
+ MOVOU (SI), X1
+ MOVOU 16(SI), X2
+ MOVO X1, (RARG)
+ MOVO X2, X5
+ AESKEYGENASSIST(0x01, 2, 0)
+ CALL rk192<>(SB)
+ SHUFPD $0, X1, X5
+ MOVO X5, 16(RARG)
+ MOVO X1, X6
+ SHUFPD $1, X2, X6
+ MOVO X6, 32(RARG)
+ AESKEYGENASSIST(0x02, 2, 0)
+ CALL rk192<>(SB)
+ MOVO X1, 48(RARG)
+ MOVO X2, X5
+ AESKEYGENASSIST(0x04, 2, 0)
+ CALL rk192<>(SB)
+ SHUFPD $0, X1, X5
+ MOVO X5, 64(RARG)
+ MOVO X1, X6
+ SHUFPD $1, X2, X6
+ MOVO X6, 80(RARG)
+ AESKEYGENASSIST(0x08, 2, 0)
+ CALL rk192<>(SB)
+ MOVO X1, 96(RARG)
+ MOVO X2, X5
+ AESKEYGENASSIST(0x10, 2, 0)
+ CALL rk192<>(SB)
+ SHUFPD $0, X1, X5
+ MOVO X5, 112(RARG)
+ MOVO X1, X6
+ SHUFPD $1, X2, X6
+ MOVO X6, 128(RARG)
+ AESKEYGENASSIST(0x20, 2, 0)
+ CALL rk192<>(SB)
+ MOVO X1, 144(RARG)
+ MOVO X2, X5
+ AESKEYGENASSIST(0x40, 2, 0)
+ CALL rk192<>(SB)
+ SHUFPD $0, X1, X5
+ MOVO X5, 160(RARG)
+ MOVO X1, X6
+ SHUFPD $1, X2, X6
+ MOVO X6, 176(RARG)
+ AESKEYGENASSIST(0x80, 2, 0)
+ CALL rk192<>(SB)
+ MOVO X1, 192(RARG)
+ MOVL $12, AX
+ RET
+TEXT rk192<>(SB), 0, $0
+ PSHUFL $0x55, X0, X0
+ MOVOU X1, X4
+ PSLLO $4, X4
+ PXOR X4, X1
+ PSLLO $4, X4
+ PXOR X4, X1
+ PSLLO $4, X4
+ PXOR X4, X1
+ PXOR X0, X1
+ PSHUFL $0xff, X1, X0
+ MOVOU X2, X4
+ PSLLO $4, X4
+ PXOR X4, X2
+ PXOR X0, X2
+ RET
+
+TEXT setupEnc256<>(SB), 0, $0
+ MOVQ key+8(FP), SI
+ MOVOU (SI), X1
+ MOVOU 16(SI), X2
+ MOVO X1, (RARG)
+ MOVO X2, 16(RARG)
+ AESKEYGENASSIST(0x01, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 32(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 48(RARG)
+ AESKEYGENASSIST(0x02, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 64(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 80(RARG)
+ AESKEYGENASSIST(0x04, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 96(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 112(RARG)
+ AESKEYGENASSIST(0x08, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 128(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 144(RARG)
+ AESKEYGENASSIST(0x10, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 160(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 176(RARG)
+ AESKEYGENASSIST(0x20, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 192(RARG)
+ AESKEYGENASSIST(0x00, 1, 0)
+ CALL rk256_b<>(SB)
+ MOVO X2, 208(RARG)
+ AESKEYGENASSIST(0x40, 2, 0)
+ CALL rk256_a<>(SB)
+ MOVO X1, 224(RARG)
+ MOVL $14, AX
+ RET
+TEXT rk256_a<>(SB), 0, $0
+ PSHUFL $0xff, X0, X0
+ MOVO X1, X4
+ PSLLO $4, X4
+ PXOR X4, X1
+ PSLLO $4, X4
+ PXOR X4, X1
+ PSLLO $4, X4
+ PXOR X4, X1
+ PXOR X0, X1
+ RET
+TEXT rk256_b<>(SB), 0, $0
+ PSHUFL $0xaa, X0, X0
+ MOVO X2, X4
+ PSLLO $4, X4
+ PXOR X4, X2
+ PSLLO $4, X4
+ PXOR X4, X2
+ PSLLO $4, X4
+ PXOR X4, X2
+ PXOR X0, X2
+ RET
diff --git a/sys/src/libsec/amd64/mkfile b/sys/src/libsec/amd64/mkfile
index f7948cad9..990d35aa4 100644
--- a/sys/src/libsec/amd64/mkfile
+++ b/sys/src/libsec/amd64/mkfile
@@ -5,6 +5,7 @@ LIB=/$objtype/lib/libsec.a
FILES=\
md5block\
sha1block\
+ aesni\
HFILES=/sys/include/libsec.h