diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-02-01 10:31:41 +0100 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-02-01 10:31:41 +0100 |
commit | ed9e9f98e9cc502c72b27c68612e9e187ec11e10 (patch) | |
tree | 42901d0156503dadce4bf1f0b60e9ef850c3c5e0 /sys/src/libsec | |
parent | d4fb753c9c90e0ca745a1b3708ad3ec4ca523e71 (diff) |
libc and ape support for amd64
Diffstat (limited to 'sys/src/libsec')
-rw-r--r-- | sys/src/libsec/amd64/md5block.s | 212 | ||||
-rw-r--r-- | sys/src/libsec/amd64/mkfile | 19 | ||||
-rw-r--r-- | sys/src/libsec/amd64/sha1block.s | 197 |
3 files changed, 428 insertions, 0 deletions
diff --git a/sys/src/libsec/amd64/md5block.s b/sys/src/libsec/amd64/md5block.s new file mode 100644 index 000000000..4e0d00883 --- /dev/null +++ b/sys/src/libsec/amd64/md5block.s @@ -0,0 +1,212 @@ +/* + * rfc1321 requires that I include this. The code is new. The constants + * all come from the rfc (hence the copyright). We trade a table for the + * macros in rfc. The total size is a lot less. -- presotto + * + * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All + * rights reserved. + * + * License to copy and use this software is granted provided that it + * is identified as the "RSA Data Security, Inc. MD5 Message-Digest + * Algorithm" in all material mentioning or referencing this software + * or this function. + * + * License is also granted to make and use derivative works provided + * that such works are identified as "derived from the RSA Data + * Security, Inc. MD5 Message-Digest Algorithm" in all material + * mentioning or referencing the derived work. + * + * RSA Data Security, Inc. makes no representations concerning either + * the merchantability of this software or the suitability of this + * software forany particular purpose. It is provided "as is" + * without express or implied warranty of any kind. + * These notices must be retained in any copies of any part of this + * documentation and/or software. + */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + +/* + * SI is data + * a += FN(B,C,D); + * a += x[sh] + t[sh]; + * a = (a << S11) | (a >> (32 - S11)); + * a += b; + */ + +#define BODY1(off,V,FN,SH,A,B,C,D)\ + FN(B,C,D)\ + LEAL V(A)(DI*1),A;\ + ADDL (off)(BP),A;\ + ROLL $SH,A;\ + ADDL B,A;\ + +#define BODY(off,V,FN,SH,A,B,C,D)\ + FN(B,C,D)\ + LEAL V(A)(DI*1),A;\ + ADDL (off)(BP),A;\ + ROLL $SH,A;\ + ADDL B,A;\ + +/* + * fn1 = ((c ^ d) & b) ^ d + */ +#define FN1(B,C,D)\ + MOVL C,DI;\ + XORL D,DI;\ + ANDL B,DI;\ + XORL D,DI;\ + +/* + * fn2 = ((b ^ c) & d) ^ c; + */ +#define FN2(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + ANDL D,DI;\ + XORL C,DI;\ + +/* + * fn3 = b ^ c ^ d; + */ +#define FN3(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL D,DI;\ + +/* + * fn4 = c ^ (b | ~d); + */ +#define FN4(B,C,D)\ + MOVL D,DI;\ + XORL $-1,DI;\ + ORL B,DI;\ + XORL C,DI;\ + +#define LEN 8 +#define STATE 16 + +TEXT _md5block+0(SB), $0 + + MOVQ RARG,R8 + MOVLQZX len+LEN(FP),BX + ADDQ BX,R8 + +mainloop: + MOVQ state+STATE(FP),SI + MOVL (SI),AX + MOVL 4(SI),BX + MOVL 8(SI),CX + MOVL 12(SI),DX + + BODY1( 0*4,0xd76aa478,FN1,S11,AX,BX,CX,DX) + BODY1( 1*4,0xe8c7b756,FN1,S12,DX,AX,BX,CX) + BODY1( 2*4,0x242070db,FN1,S13,CX,DX,AX,BX) + BODY1( 3*4,0xc1bdceee,FN1,S14,BX,CX,DX,AX) + + BODY1( 4*4,0xf57c0faf,FN1,S11,AX,BX,CX,DX) + BODY1( 5*4,0x4787c62a,FN1,S12,DX,AX,BX,CX) + BODY1( 6*4,0xa8304613,FN1,S13,CX,DX,AX,BX) + BODY1( 7*4,0xfd469501,FN1,S14,BX,CX,DX,AX) + + BODY1( 8*4,0x698098d8,FN1,S11,AX,BX,CX,DX) + BODY1( 9*4,0x8b44f7af,FN1,S12,DX,AX,BX,CX) + BODY1(10*4,0xffff5bb1,FN1,S13,CX,DX,AX,BX) + BODY1(11*4,0x895cd7be,FN1,S14,BX,CX,DX,AX) + + BODY1(12*4,0x6b901122,FN1,S11,AX,BX,CX,DX) + BODY1(13*4,0xfd987193,FN1,S12,DX,AX,BX,CX) + BODY1(14*4,0xa679438e,FN1,S13,CX,DX,AX,BX) + BODY1(15*4,0x49b40821,FN1,S14,BX,CX,DX,AX) + + + BODY( 1*4,0xf61e2562,FN2,S21,AX,BX,CX,DX) + BODY( 6*4,0xc040b340,FN2,S22,DX,AX,BX,CX) + BODY(11*4,0x265e5a51,FN2,S23,CX,DX,AX,BX) + BODY( 0*4,0xe9b6c7aa,FN2,S24,BX,CX,DX,AX) + + BODY( 5*4,0xd62f105d,FN2,S21,AX,BX,CX,DX) + BODY(10*4,0x02441453,FN2,S22,DX,AX,BX,CX) + BODY(15*4,0xd8a1e681,FN2,S23,CX,DX,AX,BX) + BODY( 4*4,0xe7d3fbc8,FN2,S24,BX,CX,DX,AX) + + BODY( 9*4,0x21e1cde6,FN2,S21,AX,BX,CX,DX) + BODY(14*4,0xc33707d6,FN2,S22,DX,AX,BX,CX) + BODY( 3*4,0xf4d50d87,FN2,S23,CX,DX,AX,BX) + BODY( 8*4,0x455a14ed,FN2,S24,BX,CX,DX,AX) + + BODY(13*4,0xa9e3e905,FN2,S21,AX,BX,CX,DX) + BODY( 2*4,0xfcefa3f8,FN2,S22,DX,AX,BX,CX) + BODY( 7*4,0x676f02d9,FN2,S23,CX,DX,AX,BX) + BODY(12*4,0x8d2a4c8a,FN2,S24,BX,CX,DX,AX) + + + BODY( 5*4,0xfffa3942,FN3,S31,AX,BX,CX,DX) + BODY( 8*4,0x8771f681,FN3,S32,DX,AX,BX,CX) + BODY(11*4,0x6d9d6122,FN3,S33,CX,DX,AX,BX) + BODY(14*4,0xfde5380c,FN3,S34,BX,CX,DX,AX) + + BODY( 1*4,0xa4beea44,FN3,S31,AX,BX,CX,DX) + BODY( 4*4,0x4bdecfa9,FN3,S32,DX,AX,BX,CX) + BODY( 7*4,0xf6bb4b60,FN3,S33,CX,DX,AX,BX) + BODY(10*4,0xbebfbc70,FN3,S34,BX,CX,DX,AX) + + BODY(13*4,0x289b7ec6,FN3,S31,AX,BX,CX,DX) + BODY( 0*4,0xeaa127fa,FN3,S32,DX,AX,BX,CX) + BODY( 3*4,0xd4ef3085,FN3,S33,CX,DX,AX,BX) + BODY( 6*4,0x04881d05,FN3,S34,BX,CX,DX,AX) + + BODY( 9*4,0xd9d4d039,FN3,S31,AX,BX,CX,DX) + BODY(12*4,0xe6db99e5,FN3,S32,DX,AX,BX,CX) + BODY(15*4,0x1fa27cf8,FN3,S33,CX,DX,AX,BX) + BODY( 2*4,0xc4ac5665,FN3,S34,BX,CX,DX,AX) + + + BODY( 0*4,0xf4292244,FN4,S41,AX,BX,CX,DX) + BODY( 7*4,0x432aff97,FN4,S42,DX,AX,BX,CX) + BODY(14*4,0xab9423a7,FN4,S43,CX,DX,AX,BX) + BODY( 5*4,0xfc93a039,FN4,S44,BX,CX,DX,AX) + + BODY(12*4,0x655b59c3,FN4,S41,AX,BX,CX,DX) + BODY( 3*4,0x8f0ccc92,FN4,S42,DX,AX,BX,CX) + BODY(10*4,0xffeff47d,FN4,S43,CX,DX,AX,BX) + BODY( 1*4,0x85845dd1,FN4,S44,BX,CX,DX,AX) + + BODY( 8*4,0x6fa87e4f,FN4,S41,AX,BX,CX,DX) + BODY(15*4,0xfe2ce6e0,FN4,S42,DX,AX,BX,CX) + BODY( 6*4,0xa3014314,FN4,S43,CX,DX,AX,BX) + BODY(13*4,0x4e0811a1,FN4,S44,BX,CX,DX,AX) + + BODY( 4*4,0xf7537e82,FN4,S41,AX,BX,CX,DX) + BODY(11*4,0xbd3af235,FN4,S42,DX,AX,BX,CX) + BODY( 2*4,0x2ad7d2bb,FN4,S43,CX,DX,AX,BX) + BODY( 9*4,0xeb86d391,FN4,S44,BX,CX,DX,AX) + + ADDQ $(16*4),BP + MOVQ state+STATE(FP),DI + ADDL AX,0(DI) + ADDL BX,4(DI) + ADDL CX,8(DI) + ADDL DX,12(DI) + + CMPQ BP,R8 + JCS mainloop + + RET diff --git a/sys/src/libsec/amd64/mkfile b/sys/src/libsec/amd64/mkfile new file mode 100644 index 000000000..f7948cad9 --- /dev/null +++ b/sys/src/libsec/amd64/mkfile @@ -0,0 +1,19 @@ +objtype=amd64 +</$objtype/mkfile + +LIB=/$objtype/lib/libsec.a +FILES=\ + md5block\ + sha1block\ + +HFILES=/sys/include/libsec.h + +SFILES=${FILES:%=%.s} + +OFILES=${FILES:%=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libsec/amd64/sha1block.s b/sys/src/libsec/amd64/sha1block.s new file mode 100644 index 000000000..d8283fb38 --- /dev/null +++ b/sys/src/libsec/amd64/sha1block.s @@ -0,0 +1,197 @@ +/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1; + * wp[off] = x; + * x += A <<< 5; + * E += 0xca62c1d6 + x; + * x = FN(B,C,D); + * E += x; + * B >>> 2 + */ +#define BSWAPDI BYTE $0x0f; BYTE $0xcf; + +#define BODY(off,FN,V,A,B,C,D,E)\ + MOVL (off-64)(BP),DI;\ + XORL (off-56)(BP),DI;\ + XORL (off-32)(BP),DI;\ + XORL (off-12)(BP),DI;\ + ROLL $1,DI;\ + MOVL DI,off(BP);\ + LEAL V(DI)(E*1),E;\ + MOVL A,DI;\ + ROLL $5,DI;\ + ADDL DI,E;\ + FN(B,C,D)\ + ADDL DI,E;\ + RORL $2,B;\ + +#define BODY0(off,FN,V,A,B,C,D,E)\ + MOVLQZX off(BX),DI;\ + BSWAPDI;\ + MOVL DI,off(BP);\ + LEAL V(DI)(E*1),E;\ + MOVL A,DI;\ + ROLL $5,DI;\ + ADDL DI,E;\ + FN(B,C,D)\ + ADDL DI,E;\ + RORL $2,B;\ + +/* + * fn1 = (((C^D)&B)^D); + */ +#define FN1(B,C,D)\ + MOVL C,DI;\ + XORL D,DI;\ + ANDL B,DI;\ + XORL D,DI;\ + +/* + * fn24 = B ^ C ^ D + */ +#define FN24(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL D,DI;\ + +/* + * fn3 = ((B ^ C) & (D ^= B)) ^ B + * D ^= B to restore D + */ +#define FN3(B,C,D)\ + MOVL B,DI;\ + XORL C,DI;\ + XORL B,D;\ + ANDL D,DI;\ + XORL B,DI;\ + XORL B,D;\ + +/* + * stack offsets + * void sha1block(uchar *DATA, int LEN, ulong *STATE) + */ +#define DATA 0 +#define LEN 8 +#define STATE 16 + +/* + * stack offsets for locals + * ulong w[80]; + * uchar *edata; + * ulong *w15, *w40, *w60, *w80; + * register local + * ulong *wp = BP + * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi + * ulong tmp = edi + */ +#define Rpdata R8 +#define WARRAY (-8-(80*4)) +#define TMP1 (-16-(80*4)) +#define TMP2 (-24-(80*4)) +#define W15 (-32-(80*4)) +#define W40 (-40-(80*4)) +#define W60 (-48-(80*4)) +#define W80 (-56-(80*4)) +#define EDATA (-64-(80*4)) + +TEXT _sha1block+0(SB),$384 + + MOVQ RARG, Rpdata + MOVLQZX len+LEN(FP),BX + ADDQ BX, RARG + MOVQ RARG,edata+EDATA(SP) + + LEAQ aw15+(WARRAY+15*4)(SP),DI + MOVQ DI,w15+W15(SP) + LEAQ aw40+(WARRAY+40*4)(SP),DX + MOVQ DX,w40+W40(SP) + LEAQ aw60+(WARRAY+60*4)(SP),CX + MOVQ CX,w60+W60(SP) + LEAQ aw80+(WARRAY+80*4)(SP),DI + MOVQ DI,w80+W80(SP) + +mainloop: + LEAQ warray+WARRAY(SP),BP + + MOVQ state+STATE(FP),DI + MOVL (DI),AX + MOVL 4(DI),BX + MOVL BX,tmp1+TMP1(SP) + MOVL 8(DI),CX + MOVL 12(DI),DX + MOVL 16(DI),SI + + MOVQ Rpdata,BX + +loop1: + BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) + MOVL SI,tmp2+TMP2(SP) + BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX) + MOVL tmp1+TMP1(SP),SI + BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX) + BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI) + MOVL SI,tmp1+TMP1(SP) + BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX) + MOVL tmp2+TMP2(SP),SI + + ADDQ $20,BX + ADDQ $20,BP + CMPQ BP,w15+W15(SP) + JCS loop1 + + BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI) + ADDQ $4,BX + MOVQ BX,R8 + MOVQ tmp1+TMP1(SP),BX + + BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX) + BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX) + BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX) + BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX) + + ADDQ $20,BP + +loop2: + BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI) + BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX) + BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX) + BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX) + BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w40+W40(SP) + JCS loop2 + +loop3: + BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI) + BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX) + BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX) + BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX) + BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w60+W60(SP) + JCS loop3 + +loop4: + BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI) + BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX) + BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX) + BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX) + BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX) + + ADDQ $20,BP + CMPQ BP,w80+W80(SP) + JCS loop4 + + MOVQ state+STATE(FP),DI + ADDL AX,0(DI) + ADDL BX,4(DI) + ADDL CX,8(DI) + ADDL DX,12(DI) + ADDL SI,16(DI) + + MOVQ edata+EDATA(SP),DI + CMPQ Rpdata,DI + JCS mainloop + + RET + END |