diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-02-01 10:31:41 +0100 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2014-02-01 10:31:41 +0100 |
commit | ed9e9f98e9cc502c72b27c68612e9e187ec11e10 (patch) | |
tree | 42901d0156503dadce4bf1f0b60e9ef850c3c5e0 /sys/src/libmp | |
parent | d4fb753c9c90e0ca745a1b3708ad3ec4ca523e71 (diff) |
libc and ape support for amd64
Diffstat (limited to 'sys/src/libmp')
-rw-r--r-- | sys/src/libmp/amd64/mkfile | 20 | ||||
-rw-r--r-- | sys/src/libmp/amd64/mpdigdiv.s | 21 | ||||
-rw-r--r-- | sys/src/libmp/amd64/mpvecadd.s | 54 | ||||
-rw-r--r-- | sys/src/libmp/amd64/mpvecdigmuladd.s | 53 | ||||
-rw-r--r-- | sys/src/libmp/amd64/mpvecdigmulsub.s | 53 | ||||
-rw-r--r-- | sys/src/libmp/amd64/mpvecsub.s | 45 |
6 files changed, 246 insertions, 0 deletions
diff --git a/sys/src/libmp/amd64/mkfile b/sys/src/libmp/amd64/mkfile new file mode 100644 index 000000000..c9ecdb003 --- /dev/null +++ b/sys/src/libmp/amd64/mkfile @@ -0,0 +1,20 @@ +objtype=amd64 +</$objtype/mkfile + +LIB=/$objtype/lib/libmp.a +SFILES=\ + mpdigdiv.s\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ + +HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libmp/amd64/mpdigdiv.s b/sys/src/libmp/amd64/mpdigdiv.s new file mode 100644 index 000000000..6025d141a --- /dev/null +++ b/sys/src/libmp/amd64/mpdigdiv.s @@ -0,0 +1,21 @@ +TEXT mpdigdiv(SB),$0 + +/* MOVL dividend+0(FP),BX */ + MOVL 0(RARG),AX + MOVL 4(RARG),DX + MOVL divisor+8(FP),BX + MOVQ quotient+16(FP),DI + XORL CX,CX + CMPL DX,BX /* dividend >= 2^32 * divisor */ + JHS _divovfl + CMPL BX,CX /* divisor == 0 */ + JE _divovfl + DIVL BX /* AX = DX:AX/BX */ + MOVL AX,0(DI) + RET + + /* return all 1's */ +_divovfl: + NOTL CX + MOVL CX,0(DI) + RET diff --git a/sys/src/libmp/amd64/mpvecadd.s b/sys/src/libmp/amd64/mpvecadd.s new file mode 100644 index 000000000..326f39dad --- /dev/null +++ b/sys/src/libmp/amd64/mpvecadd.s @@ -0,0 +1,54 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ +TEXT mpvecadd(SB),$0 + + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX +/* MOVL a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + SUBL CX,DX + MOVQ sum+32(FP),DI + XORL BP,BP /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + TESTL CX,CX + JZ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVL (SI)(BP*4), AX + ADCL (BX)(BP*4), AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop1 + +_add1: + /* jump if alen > blen */ + INCL DX + MOVL DX,CX + LOOP _addloop2 + + /* sum[alen] = carry */ +_addend: + JC _addcarry + MOVL $0,(DI)(BP*4) + RET +_addcarry: + MOVL $1,(DI)(BP*4) + RET + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + MOVL (SI)(BP*4),AX + ADCL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop2 + JMP _addend + diff --git a/sys/src/libmp/amd64/mpvecdigmuladd.s b/sys/src/libmp/amd64/mpvecdigmuladd.s new file mode 100644 index 000000000..6599a42c8 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecdigmuladd.s @@ -0,0 +1,53 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +TEXT mpvecdigmuladd(SB),$0 + +/* MOVQ b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + MOVL CX,BP + NEGQ BP /* BP = -n */ + SHLL $2,CX + ADDQ CX,SI /* SI = b + n */ + ADDQ CX,DI /* DI = p + n */ + XORL CX,CX +_muladdloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL CX,AX /* lo += oldhi */ + JCC _muladdnocarry1 + INCL DX /* hi += carry */ +_muladdnocarry1: + ADDL AX,(DI)(BP*4) /* p[i] += lo */ + JCC _muladdnocarry2 + INCL DX /* hi += carry */ +_muladdnocarry2: + MOVL DX,CX /* oldhi = hi */ + INCQ BP /* i++ */ + JNZ _muladdloop + XORL AX,AX + ADDL CX,(DI)(BP*4) /* p[n] + oldhi */ + ADCL AX,AX /* return carry out of p[n] */ + RET diff --git a/sys/src/libmp/amd64/mpvecdigmulsub.s b/sys/src/libmp/amd64/mpvecdigmulsub.s new file mode 100644 index 000000000..0b5a35761 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecdigmulsub.s @@ -0,0 +1,53 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = R8 + * + */ +TEXT mpvecdigmulsub(SB),$0 + +/* MOVL b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + XORL BP,BP + MOVL BP,R8 +_mulsubloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL R8,AX /* lo += oldhi */ + JCC _mulsubnocarry1 + INCL DX /* hi += carry */ +_mulsubnocarry1: + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry2 + INCL DX /* hi += carry */ +_mulsubnocarry2: + MOVL DX,R8 + INCL BP + LOOP _mulsubloop + SUBL R8,(DI)(BP*4) + JCC _mulsubnocarry3 + MOVQ $-1,AX + RET +_mulsubnocarry3: + MOVQ $1,AX + RET diff --git a/sys/src/libmp/amd64/mpvecsub.s b/sys/src/libmp/amd64/mpvecsub.s new file mode 100644 index 000000000..9e1b53494 --- /dev/null +++ b/sys/src/libmp/amd64/mpvecsub.s @@ -0,0 +1,45 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +TEXT mpvecsub(SB),$0 + +/* MOVQ a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX + MOVQ diff+32(FP),DI + SUBL CX,DX + XORL BP,BP /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + TESTL CX,CX + JZ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVL (SI)(BP*4),AX + SBBL (BX)(BP*4),AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop1 + +_sub1: + INCL DX + MOVL DX,CX + LOOP _subloop2 + RET + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + MOVL (SI)(BP*4),AX + SBBL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop2 + RET + |