summaryrefslogtreecommitdiff
path: root/sys/src/ape/lib/ap/power
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/power
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/power')
-rwxr-xr-xsys/src/ape/lib/ap/power/cycles.s17
-rwxr-xr-xsys/src/ape/lib/ap/power/getfcr.s28
-rwxr-xr-xsys/src/ape/lib/ap/power/lock.c45
-rwxr-xr-xsys/src/ape/lib/ap/power/main9.s14
-rwxr-xr-xsys/src/ape/lib/ap/power/main9p.s46
-rwxr-xr-xsys/src/ape/lib/ap/power/memcmp.s110
-rwxr-xr-xsys/src/ape/lib/ap/power/memmove.s170
-rwxr-xr-xsys/src/ape/lib/ap/power/memset.s73
-rwxr-xr-xsys/src/ape/lib/ap/power/mkfile23
-rwxr-xr-xsys/src/ape/lib/ap/power/notetramp.c72
-rwxr-xr-xsys/src/ape/lib/ap/power/setjmp.s37
-rwxr-xr-xsys/src/ape/lib/ap/power/strcmp.s21
-rwxr-xr-xsys/src/ape/lib/ap/power/tas.s16
-rwxr-xr-xsys/src/ape/lib/ap/power/vlop.s132
-rwxr-xr-xsys/src/ape/lib/ap/power/vlrt.c254
15 files changed, 1058 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/power/cycles.s b/sys/src/ape/lib/ap/power/cycles.s
new file mode 100755
index 000000000..b4ad52367
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/cycles.s
@@ -0,0 +1,17 @@
+#define TBRL 268
+#define TBRU 269 /* Time base Upper/Lower (Reading) */
+
+/*
+ * time stamp counter; _cycles since power up
+ * Runs at fasthz/4 cycles per second (m->clkin>>3)
+ */
+TEXT _cycles(SB),1,$0
+loop:
+ MOVW SPR(TBRU),R7
+ MOVW SPR(TBRL),R8
+ MOVW SPR(TBRU),R5
+ CMP R5,R7
+ BNE loop
+ MOVW R7,0(R3)
+ MOVW R8,4(R3)
+ RETURN
diff --git a/sys/src/ape/lib/ap/power/getfcr.s b/sys/src/ape/lib/ap/power/getfcr.s
new file mode 100755
index 000000000..b61d52e68
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/getfcr.s
@@ -0,0 +1,28 @@
+TEXT getfcr(SB), $8
+ MOVFL FPSCR, F3
+ FMOVD F3, f-8(SP)
+ MOVW -4(SP), R3
+ RETURN
+
+TEXT getfsr(SB), $8
+ MOVFL FPSCR, F3
+ FMOVD F3, f-8(SP)
+ MOVW -4(SP), R3
+ RETURN
+
+TEXT setfcr(SB), $8
+ SYNC
+ MOVW R3, -4(SP)
+ FMOVD -8(SP), F3
+ MOVFL F3, FPSCR
+ ISYNC
+ RETURN
+
+TEXT setfsr(SB), $8
+ SYNC
+ MOVW R3, -4(SP)
+ FMOVD -8(SP), F3
+ MOVFL F3, FPSCR
+ ISYNC
+ RETURN
+
diff --git a/sys/src/ape/lib/ap/power/lock.c b/sys/src/ape/lib/ap/power/lock.c
new file mode 100755
index 000000000..0d17c34d5
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/lock.c
@@ -0,0 +1,45 @@
+#include "../plan9/lib.h"
+#include "../plan9/sys9.h"
+#define _LOCK_EXTENSION
+#include <lock.h>
+
+int tas(int*);
+
+void
+lock(Lock *lk)
+{
+ int i;
+
+ /* once fast */
+ if(!tas(&lk->val))
+ return;
+ /* a thousand times pretty fast */
+ for(i=0; i<1000; i++){
+ if(!tas(&lk->val))
+ return;
+ _SLEEP(0);
+ }
+ /* now nice and slow */
+ for(i=0; i<1000; i++){
+ if(!tas(&lk->val))
+ return;
+ _SLEEP(100);
+ }
+ /* take your time */
+ while(tas(&lk->val))
+ _SLEEP(1000);
+}
+
+int
+canlock(Lock *lk)
+{
+ if(tas(&lk->val))
+ return 0;
+ return 1;
+}
+
+void
+unlock(Lock *lk)
+{
+ lk->val = 0;
+}
diff --git a/sys/src/ape/lib/ap/power/main9.s b/sys/src/ape/lib/ap/power/main9.s
new file mode 100755
index 000000000..ee493f5a8
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/main9.s
@@ -0,0 +1,14 @@
+TEXT _main(SB), 1, $16
+
+ MOVW $setSB(SB), R2
+
+ BL _envsetup(SB)
+ MOVW inargc-4(FP), R3
+ MOVW $inargv+0(FP), R4
+ MOVW R3, 4(R1)
+ MOVW R4, 8(R1)
+ BL main(SB)
+loop:
+ MOVW R3, 4(R1)
+ BL exit(SB)
+ BR loop
diff --git a/sys/src/ape/lib/ap/power/main9p.s b/sys/src/ape/lib/ap/power/main9p.s
new file mode 100755
index 000000000..865616980
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/main9p.s
@@ -0,0 +1,46 @@
+#define NPRIVATES 16
+
+GLOBL _tos(SB), $4
+GLOBL _privates(SB), $4
+GLOBL _nprivates(SB), $4
+
+TEXT _mainp(SB), 1, $(3*4+NPRIVATES*4)
+
+ MOVW $setSB(SB), R2
+
+ /* _tos = arg */
+ MOVW R3, _tos(SB)
+ MOVW $8(SP), R1
+ MOVW R1, _privates(SB)
+ MOVW $NPRIVATES, R1
+ MOVW R1, _nprivates(SB)
+
+ /* _profmain(); */
+ BL _envsetup(SB)
+
+ /* _tos->prof.pp = _tos->prof.next; */
+ MOVW _tos+0(SB),R1
+ MOVW 4(R1),R2
+ MOVW R2,(R1)
+
+ /* main(argc, argv, environ); */
+ MOVW inargc-4(FP), R3
+ MOVW $inargv+0(FP), R4
+ MOVW environ(SB), R5
+ MOVW R3, 4(R1)
+ MOVW R4, 8(R1)
+ MOVW R5, 12(R1)
+ BL main(SB)
+loop:
+ MOVW R3, 4(R1)
+ BL exit(SB)
+ MOVW $_profin(SB), R4 /* force loading of profile */
+ BR loop
+
+TEXT _savearg(SB), 1, $0
+ RETURN
+
+TEXT _callpc(SB), 1, $0
+ MOVW argp+0(FP), R3
+ MOVW 4(R3), R3
+ RETURN
diff --git a/sys/src/ape/lib/ap/power/memcmp.s b/sys/src/ape/lib/ap/power/memcmp.s
new file mode 100755
index 000000000..f524fa9d3
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/memcmp.s
@@ -0,0 +1,110 @@
+ TEXT memcmp(SB), $0
+#define BDNZ BC 16,0,
+ MOVW R3, s1+0(FP) /* R3 is pointer1 */
+
+/*
+ * performance:
+ * 67mb/sec aligned; 16mb/sec unaligned
+ */
+
+ MOVW n+8(FP), R4 /* R4 is count */
+ MOVW s2+4(FP), R5 /* R5 is pointer2 */
+
+/*
+ * let LSW do the work for 4 characters or less; aligned and unaligned
+ */
+ CMP R4, $0
+ BLE eq
+ CMP R4, $4
+ BLE out
+
+ XOR R3, R5, R9
+ ANDCC $3, R9
+ BNE l4 /* pointers misaligned; use LSW loop */
+
+/*
+ * do enough bytes to align pointers
+ */
+ ANDCC $3,R3, R9
+ BEQ l2
+ SUBC R9, $4, R9
+ MOVW R9, XER
+ LSW (R3), R10
+ ADD R9, R3
+ LSW (R5), R14
+ ADD R9, R5
+ SUB R9, R4
+ CMPU R10, R14
+ BNE ne
+
+/*
+ * compare 16 at a time
+ */
+l2:
+ SRAWCC $4, R4, R9
+ BLE l4
+ MOVW R9, CTR
+ SUB $4, R3
+ SUB $4, R5
+l3:
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ CMPU R11, R13
+ BNE ne
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ CMPU R11, R13
+ BNE ne
+ BDNZ l3
+ ADD $4, R3
+ ADD $4, R5
+ RLWNMCC $0, R4, $15, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining words with LSW; also does unaligned case
+ */
+l4:
+ SRAWCC $2, R4, R9
+ BLE out
+ MOVW R9, CTR
+l5:
+ LSW (R3), $4, R10
+ ADD $4, R3
+ LSW (R5), $4, R11
+ ADD $4, R5
+ CMPU R10, R11
+ BNE ne
+ BDNZ l5
+ RLWNMCC $0, R4, $3, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining bytes with final LSW
+ */
+out:
+ MOVW R4, XER
+ LSW (R3), R10
+ LSW (R5), R11
+ CMPU R10, R11
+ BNE ne
+
+eq:
+ MOVW $0, R3
+ RETURN
+
+ne:
+ MOVW $1, R3
+ BGE ret
+ MOVW $-1,R3
+ret:
+ RETURN
+ END
diff --git a/sys/src/ape/lib/ap/power/memmove.s b/sys/src/ape/lib/ap/power/memmove.s
new file mode 100755
index 000000000..dd6167d7d
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/memmove.s
@@ -0,0 +1,170 @@
+#define BDNZ BC 16,0,
+ TEXT memmove(SB), $0
+ BR move
+
+ TEXT memcpy(SB), $0
+move:
+
+/*
+ * performance:
+ * (tba)
+ */
+
+ MOVW R3, s1+0(FP)
+ MOVW n+8(FP), R9 /* R9 is count */
+ MOVW R3, R10 /* R10 is to-pointer */
+ CMP R9, $0
+ BEQ ret
+ BLT trap
+ MOVW s2+4(FP), R11 /* R11 is from-pointer */
+
+/*
+ * if no more than 16 bytes, just use one lsw/stsw
+ */
+ CMP R9, $16
+ BLE fout
+
+ ADD R9,R11, R13 /* R13 is end from-pointer */
+ ADD R9,R10, R12 /* R12 is end to-pointer */
+
+/*
+ * easiest test is copy backwards if
+ * destination string has higher mem address
+ */
+ CMPU R10, R11
+ BGT back
+
+/*
+ * test if both pointers
+ * are similarly word aligned
+ */
+ XOR R10,R11, R7
+ ANDCC $3,R7
+ BNE fbad
+
+/*
+ * move a few bytes to align pointers
+ */
+ ANDCC $3,R10,R7
+ BEQ f2
+ SUBC R7, $4, R7
+ SUB R7, R9
+ MOVW R7, XER
+ LSW (R11), R16
+ ADD R7, R11
+ STSW R16, (R10)
+ ADD R7, R10
+
+/*
+ * turn R14 into doubleword count
+ * copy 16 bytes at a time while there's room.
+ */
+f2:
+ SRAWCC $4, R9, R14
+ BLE fout
+ MOVW R14, CTR
+ SUB $4, R11
+ SUB $4, R10
+f3:
+ MOVWU 4(R11), R16
+ MOVWU R16, 4(R10)
+ MOVWU 4(R11), R17
+ MOVWU R17, 4(R10)
+ MOVWU 4(R11), R16
+ MOVWU R16, 4(R10)
+ MOVWU 4(R11), R17
+ MOVWU R17, 4(R10)
+ BDNZ f3
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+ ADD $4, R11
+ ADD $4, R10
+
+/*
+ * move up to 16 bytes through R16 .. R19; aligned and unaligned
+ */
+fout:
+ MOVW R9, XER
+ LSW (R11), R16
+ STSW R16, (R10)
+ BR ret
+
+/*
+ * loop for unaligned copy, then copy up to 15 remaining bytes
+ */
+fbad:
+ SRAWCC $4, R9, R14
+ BLE f6
+ MOVW R14, CTR
+f5:
+ LSW (R11), $16, R16
+ ADD $16, R11
+ STSW R16, $16, (R10)
+ ADD $16, R10
+ BDNZ f5
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+f6:
+ MOVW R9, XER
+ LSW (R11), R16
+ STSW R16, (R10)
+ BR ret
+
+/*
+ * whole thing repeated for backwards
+ */
+back:
+ CMP R9, $4
+ BLT bout
+
+ XOR R12,R13, R7
+ ANDCC $3,R7
+ BNE bout
+b1:
+ ANDCC $3,R13, R7
+ BEQ b2
+ MOVBZU -1(R13), R16
+ MOVBZU R16, -1(R12)
+ SUB $1, R9
+ BR b1
+b2:
+ SRAWCC $4, R9, R14
+ BLE b4
+ MOVW R14, CTR
+b3:
+ MOVWU -4(R13), R16
+ MOVWU R16, -4(R12)
+ MOVWU -4(R13), R17
+ MOVWU R17, -4(R12)
+ MOVWU -4(R13), R16
+ MOVWU R16, -4(R12)
+ MOVWU -4(R13), R17
+ MOVWU R17, -4(R12)
+ BDNZ b3
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+b4:
+ SRAWCC $2, R9, R14
+ BLE bout
+ MOVW R14, CTR
+b5:
+ MOVWU -4(R13), R16
+ MOVWU R16, -4(R12)
+ BDNZ b5
+ RLWNMCC $0, R9, $3, R9 /* residue */
+ BEQ ret
+
+bout:
+ CMPU R13, R11
+ BLE ret
+ MOVBZU -1(R13), R16
+ MOVBZU R16, -1(R12)
+ BR bout
+
+trap:
+ MOVW $0, R0
+ MOVW 0(R0), R0
+
+ret:
+ MOVW s1+0(FP), R3
+ RETURN
diff --git a/sys/src/ape/lib/ap/power/memset.s b/sys/src/ape/lib/ap/power/memset.s
new file mode 100755
index 000000000..fa6e8d920
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/memset.s
@@ -0,0 +1,73 @@
+ TEXT memset(SB),$0
+#define BDNZ BC 16,0,
+ MOVW R3, p+0(FP) /* R3 is pointer */
+
+/*
+ * performance:
+ * about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache
+ * drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks
+ */
+
+ MOVW n+8(FP), R4 /* R4 is count */
+ CMP R4, $0
+ BLE ret
+ MOVW c+4(FP), R5 /* R5 is char */
+
+/*
+ * create 16 copies of c in R5 .. R8
+ */
+ RLWNM $0, R5, $0xff, R5
+ RLWMI $8, R5, $0xff00, R5
+ RLWMI $16, R5, $0xffff0000, R5
+ MOVW R5, R6
+ MOVW R5, R7
+ MOVW R5, R8
+
+/*
+ * let STSW do the work for 16 characters or less; aligned and unaligned
+ */
+ CMP R4, $16
+ BLE out
+
+/*
+ * store enough bytes to align pointer
+ */
+ ANDCC $7,R3, R9
+ BEQ l2
+ SUBC R9, $8, R9
+ MOVW R9, XER
+ STSW R5, (R3)
+ ADD R9, R3
+ SUB R9, R4
+
+/*
+ * store 16 at a time while there's room
+ * STSW was used here originally, but it's `completion serialised'
+ */
+l2:
+ SRAWCC $4, R4, R9
+ BLE out
+ MOVW R9, CTR
+l3:
+ MOVW R5, 0(R3)
+ ADD $8, R3, R10
+ MOVW R6, 4(R3)
+ MOVW R7, 0(R10)
+ ADD $8, R10, R3
+ MOVW R8, 4(R10)
+ BDNZ l3
+ RLWNMCC $0, R4, $15, R4 /* residue */
+ BEQ ret
+
+/*
+ * store up to 16 bytes from R5 .. R8; aligned and unaligned
+ */
+
+out:
+ MOVW R4, XER
+ STSW R5, (R3)
+
+ret:
+ MOVW 0(FP), R3
+ RETURN
+ END
diff --git a/sys/src/ape/lib/ap/power/mkfile b/sys/src/ape/lib/ap/power/mkfile
new file mode 100755
index 000000000..cc546cd84
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/mkfile
@@ -0,0 +1,23 @@
+APE=/sys/src/ape
+<$APE/config
+LIB=/$objtype/lib/ape/libap.a
+OFILES=\
+ cycles.$O\
+ getfcr.$O\
+ lock.$O\
+ main9.$O\
+ main9p.$O\
+ memcmp.$O\
+ memmove.$O\
+ memset.$O\
+ notetramp.$O\
+ setjmp.$O\
+ strcmp.$O\
+ tas.$O\
+ vlop.$O\
+ vlrt.$O\
+
+</sys/src/cmd/mksyslib
+
+CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE
+
diff --git a/sys/src/ape/lib/ap/power/notetramp.c b/sys/src/ape/lib/ap/power/notetramp.c
new file mode 100755
index 000000000..6477e1b14
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/notetramp.c
@@ -0,0 +1,72 @@
+#include "../plan9/lib.h"
+#include "../plan9/sys9.h"
+#include <signal.h>
+#include <setjmp.h>
+
+/* A stack to hold pcs when signals nest */
+#define MAXSIGSTACK 20
+typedef struct Pcstack Pcstack;
+static struct Pcstack {
+ int sig;
+ void (*hdlr)(int, char*, Ureg*);
+ unsigned long restorepc;
+ Ureg *u;
+} pcstack[MAXSIGSTACK];
+static int nstack = 0;
+
+static void notecont(Ureg*, char*);
+
+void
+_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u)
+{
+ Pcstack *p;
+
+ if(nstack >= MAXSIGSTACK)
+ _NOTED(1); /* nesting too deep; just do system default */
+ p = &pcstack[nstack];
+ p->restorepc = u->pc;
+ p->sig = sig;
+ p->hdlr = hdlr;
+ p->u = u;
+ nstack++;
+ u->pc = (unsigned long) notecont;
+ _NOTED(2); /* NSAVE: clear note but hold state */
+}
+
+static void
+notecont(Ureg *u, char *s)
+{
+ Pcstack *p;
+ void(*f)(int, char*, Ureg*);
+
+ p = &pcstack[nstack-1];
+ f = p->hdlr;
+ u->pc = p->restorepc;
+ nstack--;
+ (*f)(p->sig, s, u);
+ _NOTED(3); /* NRSTR */
+}
+
+#define JMPBUFPC 1
+#define JMPBUFSP 0
+
+extern sigset_t _psigblocked;
+
+void
+siglongjmp(sigjmp_buf j, int ret)
+{
+ struct Ureg *u;
+
+ if(j[0])
+ _psigblocked = j[1];
+ if(nstack == 0 || pcstack[nstack-1].u->sp > j[2+JMPBUFSP])
+ longjmp(j+2, ret);
+ u = pcstack[nstack-1].u;
+ nstack--;
+ u->r3 = ret;
+ if(ret == 0)
+ u->r3 = 1;
+ u->pc = j[2+JMPBUFPC];
+ u->sp = j[2+JMPBUFSP];
+ _NOTED(3); /* NRSTR */
+}
diff --git a/sys/src/ape/lib/ap/power/setjmp.s b/sys/src/ape/lib/ap/power/setjmp.s
new file mode 100755
index 000000000..0023afcf2
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/setjmp.s
@@ -0,0 +1,37 @@
+TEXT setjmp(SB), 1, $-4
+ MOVW LR, R4
+ MOVW R1, (R3)
+ MOVW R4, 4(R3)
+ MOVW $0, R3
+ RETURN
+
+TEXT sigsetjmp(SB), 1, $-4
+ MOVW savemask+4(FP), R4
+ MOVW R4, 0(R3)
+ MOVW $_psigblocked(SB), R4
+ MOVW R4, 4(R3)
+ MOVW LR, R4
+ MOVW R1, 8(R3)
+ MOVW R4, 12(R3)
+ MOVW $0, R3
+ RETURN
+
+TEXT longjmp(SB), 1, $-4
+ MOVW R3, R4
+ MOVW r+4(FP), R3
+ CMP R3, $0
+ BNE ok /* ansi: "longjmp(0) => longjmp(1)" */
+ MOVW $1, R3 /* bless their pointed heads */
+ok: MOVW (R4), R1
+ MOVW 4(R4), R4
+ MOVW R4, LR
+ BR (LR)
+
+/*
+ * trampoline functions because the kernel smashes r1
+ * in the uregs given to notejmp
+ */
+TEXT __noterestore(SB), 1, $-4
+ MOVW R4, R3
+ MOVW R5, LR
+ BR (LR)
diff --git a/sys/src/ape/lib/ap/power/strcmp.s b/sys/src/ape/lib/ap/power/strcmp.s
new file mode 100755
index 000000000..0aef5b29c
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/strcmp.s
@@ -0,0 +1,21 @@
+TEXT strcmp(SB), $0
+
+ MOVW s2+4(FP), R4
+
+ SUB $1, R3
+ SUB $1, R4
+l1:
+ MOVBZU 1(R3), R5
+ MOVBZU 1(R4), R6
+ CMP R5, R6
+ BNE ne
+ CMP R5, $0
+ BNE l1
+ MOVW $0, R3
+ RETURN
+ne:
+ MOVW $1, R3
+ BGT ret
+ MOVW $-1, R3
+ret:
+ RETURN
diff --git a/sys/src/ape/lib/ap/power/tas.s b/sys/src/ape/lib/ap/power/tas.s
new file mode 100755
index 000000000..09fb0c492
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/tas.s
@@ -0,0 +1,16 @@
+TEXT tas(SB), $0
+ SYNC
+ MOVW R3, R4
+ MOVW $0xdeaddead,R5
+tas1:
+ DCBF (R4) /* fix for 603x bug */
+ LWAR (R4), R3
+ CMP R3, $0
+ BNE tas0
+ DCBT (R4) /* fix 405 errata cpu_210 */
+ STWCCC R5, (R4)
+ BNE tas1
+tas0:
+ SYNC
+ ISYNC
+ RETURN
diff --git a/sys/src/ape/lib/ap/power/vlop.s b/sys/src/ape/lib/ap/power/vlop.s
new file mode 100755
index 000000000..9085da247
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/vlop.s
@@ -0,0 +1,132 @@
+#define BDNZ BC 16,0,
+
+/*
+ * 64/64 division adapted from powerpc compiler writer's handbook
+ *
+ * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
+ * quo dvd dvs
+ *
+ * Remainder is left in R7:R8
+ *
+ * Code comment notation:
+ * msw = most-significant (high-order) word, i.e. bits 0..31
+ * lsw = least-significant (low-order) word, i.e. bits 32..63
+ * LZ = Leading Zeroes
+ * SD = Significant Digits
+ *
+ * R3:R4 = dvd (input dividend); quo (output quotient)
+ * R5:R6 = dvs (input divisor)
+ *
+ * R7:R8 = tmp; rem (output remainder)
+ */
+
+TEXT _divu64(SB), $0
+ MOVW a+0(FP), R3
+ MOVW a+4(FP), R4
+ MOVW b+8(FP), R5
+ MOVW b+12(FP), R6
+
+ /* count the number of leading 0s in the dividend */
+ CMP R3, $0 /* dvd.msw == 0? R3, */
+ CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
+ CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
+ BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
+ ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
+
+lab1:
+ /* count the number of leading 0s in the divisor */
+ CMP R5, $0 /* dvd.msw == 0? */
+ CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
+ CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
+ BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
+ ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
+
+lab2:
+ /* determine shift amounts to minimize the number of iterations */
+ CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
+ SUBC R11, $64, R10 /* R10 = dvd.SD */
+ BGT lab9 /* if(dvs > dvd) quotient = 0 */
+ ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
+ SUBC R9, $64, R9 /* R9 = dvs.SD */
+ ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
+ /* initial dvd */
+ SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
+ /* initial tmp */
+ MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
+
+ /* R7:R8 = R3:R4 >> R9 */
+ CMP R9, $32
+ ADD $-32, R9, R7
+ BLT lab3 /* if(R9 < 32) jump to lab3 */
+ SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
+ MOVW $0, R7 /* tmp.msw = 0 */
+ BR lab4
+lab3:
+ SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
+ SUBC R9, $32, R7
+ SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
+ OR R7, R8 /* tmp.lsw = R8 | R7 */
+ SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
+
+lab4:
+ /* R3:R4 = R3:R4 << R11 */
+ CMP R11,$32
+ ADDC $-32, R11, R9
+ BLT lab5 /* (R11 < 32)? */
+ SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
+ MOVW $0, R4 /* dvd.lsw = 0 */
+ BR lab6
+
+lab5:
+ SLW R11, R3 /* R3 = dvd.msw << R11 */
+ SUBC R11, $32, R9
+ SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
+ OR R9, R3 /* dvd.msw = R3 | R9 */
+ SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
+
+lab6:
+ /* restoring division shift and subtract loop */
+ MOVW $-1, R10
+ ADDC $0, R7 /* clear carry bit before loop starts */
+lab7:
+ /* tmp:dvd is considered one large register */
+ /* each portion is shifted left 1 bit by adding it to itself */
+ /* adde sums the carry from the previous and creates a new carry */
+ ADDE R4,R4 /* shift dvd.lsw left 1 bit */
+ ADDE R3,R3 /* shift dvd.msw to left 1 bit */
+ ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
+ ADDE R7,R7 /* shift tmp.msw to left 1 bit */
+ SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
+ SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
+ BLT lab8 /* if(result < 0) clear carry bit */
+ MOVW R11, R8 /* move lsw */
+ MOVW R9, R7 /* move msw */
+ ADDC $1, R10, R11 /* set carry bit */
+lab8:
+ BDNZ lab7
+
+ ADDE R4,R4 /* quo.lsw (lsb = CA) */
+ ADDE R3,R3 /* quo.msw (lsb from lsw) */
+
+lab10:
+ MOVW qp+16(FP), R9
+ MOVW rp+20(FP), R10
+ CMP R9, $0
+ BEQ lab11
+ MOVW R3, 0(R9)
+ MOVW R4, 4(R9)
+lab11:
+ CMP R10, $0
+ BEQ lab12
+ MOVW R7, 0(R10)
+ MOVW R8, 4(R10)
+lab12:
+ RETURN
+
+lab9:
+ /* Quotient is 0 (dvs > dvd) */
+ MOVW R4, R8 /* rmd.lsw = dvd.lsw */
+ MOVW R3, R7 /* rmd.msw = dvd.msw */
+ MOVW $0, R4 /* dvd.lsw = 0 */
+ MOVW $0, R3 /* dvd.msw = 0 */
+ BR lab10
diff --git a/sys/src/ape/lib/ap/power/vlrt.c b/sys/src/ape/lib/ap/power/vlrt.c
new file mode 100755
index 000000000..681a3b49b
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/vlrt.c
@@ -0,0 +1,254 @@
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+typedef signed char schar;
+
+#define SIGN(n) (1UL<<(n-1))
+
+typedef struct Vlong Vlong;
+struct Vlong
+{
+ ulong hi;
+ ulong lo;
+};
+
+void abort(void);
+void _divu64(Vlong, Vlong, Vlong*, Vlong*);
+
+void
+_d2v(Vlong *y, double d)
+{
+ union { double d; Vlong; } x;
+ ulong xhi, xlo, ylo, yhi;
+ int sh;
+
+ x.d = d;
+
+ xhi = (x.hi & 0xfffff) | 0x100000;
+ xlo = x.lo;
+ sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+ ylo = 0;
+ yhi = 0;
+ if(sh >= 0) {
+ /* v = (hi||lo) >> sh */
+ if(sh < 32) {
+ if(sh == 0) {
+ ylo = xlo;
+ yhi = xhi;
+ } else {
+ ylo = (xlo >> sh) | (xhi << (32-sh));
+ yhi = xhi >> sh;
+ }
+ } else {
+ if(sh == 32) {
+ ylo = xhi;
+ } else
+ if(sh < 64) {
+ ylo = xhi >> (sh-32);
+ }
+ }
+ } else {
+ /* v = (hi||lo) << -sh */
+ sh = -sh;
+ if(sh <= 10) {
+ ylo = xlo << sh;
+ yhi = (xhi << sh) | (xlo >> (32-sh));
+ } else {
+ /* overflow */
+ yhi = d; /* causes something awful */
+ }
+ }
+ if(x.hi & SIGN(32)) {
+ if(ylo != 0) {
+ ylo = -ylo;
+ yhi = ~yhi;
+ } else
+ yhi = -yhi;
+ }
+
+ y->hi = yhi;
+ y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+
+ _d2v(y, f);
+}
+
+double
+_v2d(Vlong x)
+{
+ if(x.hi & SIGN(32)) {
+ if(x.lo) {
+ x.lo = -x.lo;
+ x.hi = ~x.hi;
+ } else
+ x.hi = -x.hi;
+ return -((long)x.hi*4294967296. + x.lo);
+ }
+ return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+ return _v2d(x);
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+
+ if(n.hi == 0 && d.hi == 0) {
+ q->hi = 0;
+ q->lo = n.lo / d.lo;
+ return;
+ }
+ _divu64(n, d, q, 0);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+ if(n.hi == 0 && d.hi == 0) {
+ r->hi = 0;
+ r->lo = n.lo % d.lo;
+ return;
+ }
+ _divu64(n, d, 0, r);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+ if(v->lo == 0) {
+ v->hi = -v->hi;
+ return;
+ }
+ v->lo = -v->lo;
+ v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ q->lo = (long)n.lo / (long)d.lo;
+ q->hi = ((long)q->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ _divu64(n, d, q, 0);
+ if(nneg != dneg)
+ vneg(q);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ r->lo = (long)n.lo % (long)d.lo;
+ r->hi = ((long)r->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ _divu64(n, d, 0, r);
+ if(nneg)
+ vneg(r);
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+ Vlong t, u;
+
+ u = *ret;
+ switch(type) {
+ default:
+ abort();
+ break;
+
+ case 1: /* schar */
+ t.lo = *(schar*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(schar*)lv = u.lo;
+ break;
+
+ case 2: /* uchar */
+ t.lo = *(uchar*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uchar*)lv = u.lo;
+ break;
+
+ case 3: /* short */
+ t.lo = *(short*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(short*)lv = u.lo;
+ break;
+
+ case 4: /* ushort */
+ t.lo = *(ushort*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ushort*)lv = u.lo;
+ break;
+
+ case 9: /* int */
+ t.lo = *(int*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(int*)lv = u.lo;
+ break;
+
+ case 10: /* uint */
+ t.lo = *(uint*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uint*)lv = u.lo;
+ break;
+
+ case 5: /* long */
+ t.lo = *(long*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(long*)lv = u.lo;
+ break;
+
+ case 6: /* ulong */
+ t.lo = *(ulong*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ulong*)lv = u.lo;
+ break;
+
+ case 7: /* vlong */
+ case 8: /* uvlong */
+ fn(&u, *(Vlong*)lv, rv);
+ *(Vlong*)lv = u;
+ break;
+ }
+ *ret = u;
+}