summaryrefslogtreecommitdiff
path: root/sys/src/libmp
diff options
context:
space:
mode:
authorcinap_lenrek <cinap_lenrek@felloff.net>2014-02-01 10:31:41 +0100
committercinap_lenrek <cinap_lenrek@felloff.net>2014-02-01 10:31:41 +0100
commited9e9f98e9cc502c72b27c68612e9e187ec11e10 (patch)
tree42901d0156503dadce4bf1f0b60e9ef850c3c5e0 /sys/src/libmp
parentd4fb753c9c90e0ca745a1b3708ad3ec4ca523e71 (diff)
libc and ape support for amd64
Diffstat (limited to 'sys/src/libmp')
-rw-r--r--sys/src/libmp/amd64/mkfile20
-rw-r--r--sys/src/libmp/amd64/mpdigdiv.s21
-rw-r--r--sys/src/libmp/amd64/mpvecadd.s54
-rw-r--r--sys/src/libmp/amd64/mpvecdigmuladd.s53
-rw-r--r--sys/src/libmp/amd64/mpvecdigmulsub.s53
-rw-r--r--sys/src/libmp/amd64/mpvecsub.s45
6 files changed, 246 insertions, 0 deletions
diff --git a/sys/src/libmp/amd64/mkfile b/sys/src/libmp/amd64/mkfile
new file mode 100644
index 000000000..c9ecdb003
--- /dev/null
+++ b/sys/src/libmp/amd64/mkfile
@@ -0,0 +1,20 @@
+objtype=amd64
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libmp.a
+SFILES=\
+ mpdigdiv.s\
+ mpvecadd.s\
+ mpvecdigmuladd.s\
+ mpvecdigmulsub.s\
+ mpvecsub.s\
+
+HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
+
+OFILES=${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+ $HFILES\
+ $SFILES\
+
+</sys/src/cmd/mksyslib
diff --git a/sys/src/libmp/amd64/mpdigdiv.s b/sys/src/libmp/amd64/mpdigdiv.s
new file mode 100644
index 000000000..6025d141a
--- /dev/null
+++ b/sys/src/libmp/amd64/mpdigdiv.s
@@ -0,0 +1,21 @@
+TEXT mpdigdiv(SB),$0
+
+/* MOVL dividend+0(FP),BX */
+ MOVL 0(RARG),AX
+ MOVL 4(RARG),DX
+ MOVL divisor+8(FP),BX
+ MOVQ quotient+16(FP),DI
+ XORL CX,CX
+ CMPL DX,BX /* dividend >= 2^32 * divisor */
+ JHS _divovfl
+ CMPL BX,CX /* divisor == 0 */
+ JE _divovfl
+ DIVL BX /* AX = DX:AX/BX */
+ MOVL AX,0(DI)
+ RET
+
+ /* return all 1's */
+_divovfl:
+ NOTL CX
+ MOVL CX,0(DI)
+ RET
diff --git a/sys/src/libmp/amd64/mpvecadd.s b/sys/src/libmp/amd64/mpvecadd.s
new file mode 100644
index 000000000..326f39dad
--- /dev/null
+++ b/sys/src/libmp/amd64/mpvecadd.s
@@ -0,0 +1,54 @@
+/*
+ * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum)
+ *
+ * sum[0:alen] = a[0:alen-1] + b[0:blen-1]
+ *
+ * prereq: alen >= blen, sum has room for alen+1 digits
+ */
+TEXT mpvecadd(SB),$0
+
+ MOVL alen+8(FP),DX
+ MOVL blen+24(FP),CX
+/* MOVL a+0(FP),SI */
+ MOVQ RARG, SI
+ MOVQ b+16(FP),BX
+ SUBL CX,DX
+ MOVQ sum+32(FP),DI
+ XORL BP,BP /* this also sets carry to 0 */
+
+ /* skip addition if b is zero */
+ TESTL CX,CX
+ JZ _add1
+
+ /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */
+_addloop1:
+ MOVL (SI)(BP*4), AX
+ ADCL (BX)(BP*4), AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _addloop1
+
+_add1:
+ /* jump if alen > blen */
+ INCL DX
+ MOVL DX,CX
+ LOOP _addloop2
+
+ /* sum[alen] = carry */
+_addend:
+ JC _addcarry
+ MOVL $0,(DI)(BP*4)
+ RET
+_addcarry:
+ MOVL $1,(DI)(BP*4)
+ RET
+
+ /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */
+_addloop2:
+ MOVL (SI)(BP*4),AX
+ ADCL $0,AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _addloop2
+ JMP _addend
+
diff --git a/sys/src/libmp/amd64/mpvecdigmuladd.s b/sys/src/libmp/amd64/mpvecdigmuladd.s
new file mode 100644
index 000000000..6599a42c8
--- /dev/null
+++ b/sys/src/libmp/amd64/mpvecdigmuladd.s
@@ -0,0 +1,53 @@
+/*
+ * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p += b*m
+ *
+ * each step look like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * hi = DX - constrained by hardware
+ * lo = AX - constrained by hardware
+ * b+n = SI - can't be BP
+ * p+n = DI - can't be BP
+ * i-n = BP
+ * m = BX
+ * oldhi = CX
+ *
+ */
+TEXT mpvecdigmuladd(SB),$0
+
+/* MOVQ b+0(FP),SI */
+ MOVQ RARG,SI
+ MOVL n+8(FP),CX
+ MOVL m+16(FP),BX
+ MOVQ p+24(FP),DI
+ MOVL CX,BP
+ NEGQ BP /* BP = -n */
+ SHLL $2,CX
+ ADDQ CX,SI /* SI = b + n */
+ ADDQ CX,DI /* DI = p + n */
+ XORL CX,CX
+_muladdloop:
+ MOVL (SI)(BP*4),AX /* lo = b[i] */
+ MULL BX /* hi, lo = b[i] * m */
+ ADDL CX,AX /* lo += oldhi */
+ JCC _muladdnocarry1
+ INCL DX /* hi += carry */
+_muladdnocarry1:
+ ADDL AX,(DI)(BP*4) /* p[i] += lo */
+ JCC _muladdnocarry2
+ INCL DX /* hi += carry */
+_muladdnocarry2:
+ MOVL DX,CX /* oldhi = hi */
+ INCQ BP /* i++ */
+ JNZ _muladdloop
+ XORL AX,AX
+ ADDL CX,(DI)(BP*4) /* p[n] + oldhi */
+ ADCL AX,AX /* return carry out of p[n] */
+ RET
diff --git a/sys/src/libmp/amd64/mpvecdigmulsub.s b/sys/src/libmp/amd64/mpvecdigmulsub.s
new file mode 100644
index 000000000..0b5a35761
--- /dev/null
+++ b/sys/src/libmp/amd64/mpvecdigmulsub.s
@@ -0,0 +1,53 @@
+/*
+ * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p -= b*m
+ *
+ * each step look like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * hi = DX - constrained by hardware
+ * lo = AX - constrained by hardware
+ * b = SI - can't be BP
+ * p = DI - can't be BP
+ * i = BP
+ * n = CX - constrained by LOOP instr
+ * m = BX
+ * oldhi = R8
+ *
+ */
+TEXT mpvecdigmulsub(SB),$0
+
+/* MOVL b+0(FP),SI */
+ MOVQ RARG,SI
+ MOVL n+8(FP),CX
+ MOVL m+16(FP),BX
+ MOVQ p+24(FP),DI
+ XORL BP,BP
+ MOVL BP,R8
+_mulsubloop:
+ MOVL (SI)(BP*4),AX /* lo = b[i] */
+ MULL BX /* hi, lo = b[i] * m */
+ ADDL R8,AX /* lo += oldhi */
+ JCC _mulsubnocarry1
+ INCL DX /* hi += carry */
+_mulsubnocarry1:
+ SUBL AX,(DI)(BP*4)
+ JCC _mulsubnocarry2
+ INCL DX /* hi += carry */
+_mulsubnocarry2:
+ MOVL DX,R8
+ INCL BP
+ LOOP _mulsubloop
+ SUBL R8,(DI)(BP*4)
+ JCC _mulsubnocarry3
+ MOVQ $-1,AX
+ RET
+_mulsubnocarry3:
+ MOVQ $1,AX
+ RET
diff --git a/sys/src/libmp/amd64/mpvecsub.s b/sys/src/libmp/amd64/mpvecsub.s
new file mode 100644
index 000000000..9e1b53494
--- /dev/null
+++ b/sys/src/libmp/amd64/mpvecsub.s
@@ -0,0 +1,45 @@
+/*
+ * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff)
+ *
+ * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1]
+ *
+ * prereq: alen >= blen, diff has room for alen digits
+ */
+TEXT mpvecsub(SB),$0
+
+/* MOVQ a+0(FP),SI */
+ MOVQ RARG, SI
+ MOVQ b+16(FP),BX
+ MOVL alen+8(FP),DX
+ MOVL blen+24(FP),CX
+ MOVQ diff+32(FP),DI
+ SUBL CX,DX
+ XORL BP,BP /* this also sets carry to 0 */
+
+ /* skip subraction if b is zero */
+ TESTL CX,CX
+ JZ _sub1
+
+ /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */
+_subloop1:
+ MOVL (SI)(BP*4),AX
+ SBBL (BX)(BP*4),AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _subloop1
+
+_sub1:
+ INCL DX
+ MOVL DX,CX
+ LOOP _subloop2
+ RET
+
+ /* diff[blen:alen-1] = a[blen:alen-1] - 0 */
+_subloop2:
+ MOVL (SI)(BP*4),AX
+ SBBL $0,AX
+ MOVL AX,(DI)(BP*4)
+ INCL BP
+ LOOP _subloop2
+ RET
+