diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/power |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/power')
-rwxr-xr-x | sys/src/ape/lib/ap/power/cycles.s | 17 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/getfcr.s | 28 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/lock.c | 45 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/main9.s | 14 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/main9p.s | 46 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/memcmp.s | 110 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/memmove.s | 170 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/memset.s | 73 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/mkfile | 23 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/notetramp.c | 72 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/setjmp.s | 37 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/strcmp.s | 21 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/tas.s | 16 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/vlop.s | 132 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/power/vlrt.c | 254 |
15 files changed, 1058 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/power/cycles.s b/sys/src/ape/lib/ap/power/cycles.s new file mode 100755 index 000000000..b4ad52367 --- /dev/null +++ b/sys/src/ape/lib/ap/power/cycles.s @@ -0,0 +1,17 @@ +#define TBRL 268 +#define TBRU 269 /* Time base Upper/Lower (Reading) */ + +/* + * time stamp counter; _cycles since power up + * Runs at fasthz/4 cycles per second (m->clkin>>3) + */ +TEXT _cycles(SB),1,$0 +loop: + MOVW SPR(TBRU),R7 + MOVW SPR(TBRL),R8 + MOVW SPR(TBRU),R5 + CMP R5,R7 + BNE loop + MOVW R7,0(R3) + MOVW R8,4(R3) + RETURN diff --git a/sys/src/ape/lib/ap/power/getfcr.s b/sys/src/ape/lib/ap/power/getfcr.s new file mode 100755 index 000000000..b61d52e68 --- /dev/null +++ b/sys/src/ape/lib/ap/power/getfcr.s @@ -0,0 +1,28 @@ +TEXT getfcr(SB), $8 + MOVFL FPSCR, F3 + FMOVD F3, f-8(SP) + MOVW -4(SP), R3 + RETURN + +TEXT getfsr(SB), $8 + MOVFL FPSCR, F3 + FMOVD F3, f-8(SP) + MOVW -4(SP), R3 + RETURN + +TEXT setfcr(SB), $8 + SYNC + MOVW R3, -4(SP) + FMOVD -8(SP), F3 + MOVFL F3, FPSCR + ISYNC + RETURN + +TEXT setfsr(SB), $8 + SYNC + MOVW R3, -4(SP) + FMOVD -8(SP), F3 + MOVFL F3, FPSCR + ISYNC + RETURN + diff --git a/sys/src/ape/lib/ap/power/lock.c b/sys/src/ape/lib/ap/power/lock.c new file mode 100755 index 000000000..0d17c34d5 --- /dev/null +++ b/sys/src/ape/lib/ap/power/lock.c @@ -0,0 +1,45 @@ +#include "../plan9/lib.h" +#include "../plan9/sys9.h" +#define _LOCK_EXTENSION +#include <lock.h> + +int tas(int*); + +void +lock(Lock *lk) +{ + int i; + + /* once fast */ + if(!tas(&lk->val)) + return; + /* a thousand times pretty fast */ + for(i=0; i<1000; i++){ + if(!tas(&lk->val)) + return; + _SLEEP(0); + } + /* now nice and slow */ + for(i=0; i<1000; i++){ + if(!tas(&lk->val)) + return; + _SLEEP(100); + } + /* take your time */ + while(tas(&lk->val)) + _SLEEP(1000); +} + +int +canlock(Lock *lk) +{ + if(tas(&lk->val)) + return 0; + return 1; +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} diff --git a/sys/src/ape/lib/ap/power/main9.s b/sys/src/ape/lib/ap/power/main9.s new file mode 100755 index 000000000..ee493f5a8 --- /dev/null +++ b/sys/src/ape/lib/ap/power/main9.s @@ -0,0 +1,14 @@ +TEXT _main(SB), 1, $16 + + MOVW $setSB(SB), R2 + + BL _envsetup(SB) + MOVW inargc-4(FP), R3 + MOVW $inargv+0(FP), R4 + MOVW R3, 4(R1) + MOVW R4, 8(R1) + BL main(SB) +loop: + MOVW R3, 4(R1) + BL exit(SB) + BR loop diff --git a/sys/src/ape/lib/ap/power/main9p.s b/sys/src/ape/lib/ap/power/main9p.s new file mode 100755 index 000000000..865616980 --- /dev/null +++ b/sys/src/ape/lib/ap/power/main9p.s @@ -0,0 +1,46 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $4 +GLOBL _privates(SB), $4 +GLOBL _nprivates(SB), $4 + +TEXT _mainp(SB), 1, $(3*4+NPRIVATES*4) + + MOVW $setSB(SB), R2 + + /* _tos = arg */ + MOVW R3, _tos(SB) + MOVW $8(SP), R1 + MOVW R1, _privates(SB) + MOVW $NPRIVATES, R1 + MOVW R1, _nprivates(SB) + + /* _profmain(); */ + BL _envsetup(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVW _tos+0(SB),R1 + MOVW 4(R1),R2 + MOVW R2,(R1) + + /* main(argc, argv, environ); */ + MOVW inargc-4(FP), R3 + MOVW $inargv+0(FP), R4 + MOVW environ(SB), R5 + MOVW R3, 4(R1) + MOVW R4, 8(R1) + MOVW R5, 12(R1) + BL main(SB) +loop: + MOVW R3, 4(R1) + BL exit(SB) + MOVW $_profin(SB), R4 /* force loading of profile */ + BR loop + +TEXT _savearg(SB), 1, $0 + RETURN + +TEXT _callpc(SB), 1, $0 + MOVW argp+0(FP), R3 + MOVW 4(R3), R3 + RETURN diff --git a/sys/src/ape/lib/ap/power/memcmp.s b/sys/src/ape/lib/ap/power/memcmp.s new file mode 100755 index 000000000..f524fa9d3 --- /dev/null +++ b/sys/src/ape/lib/ap/power/memcmp.s @@ -0,0 +1,110 @@ + TEXT memcmp(SB), $0 +#define BDNZ BC 16,0, + MOVW R3, s1+0(FP) /* R3 is pointer1 */ + +/* + * performance: + * 67mb/sec aligned; 16mb/sec unaligned + */ + + MOVW n+8(FP), R4 /* R4 is count */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + +/* + * let LSW do the work for 4 characters or less; aligned and unaligned + */ + CMP R4, $0 + BLE eq + CMP R4, $4 + BLE out + + XOR R3, R5, R9 + ANDCC $3, R9 + BNE l4 /* pointers misaligned; use LSW loop */ + +/* + * do enough bytes to align pointers + */ + ANDCC $3,R3, R9 + BEQ l2 + SUBC R9, $4, R9 + MOVW R9, XER + LSW (R3), R10 + ADD R9, R3 + LSW (R5), R14 + ADD R9, R5 + SUB R9, R4 + CMPU R10, R14 + BNE ne + +/* + * compare 16 at a time + */ +l2: + SRAWCC $4, R4, R9 + BLE l4 + MOVW R9, CTR + SUB $4, R3 + SUB $4, R5 +l3: + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + CMPU R11, R13 + BNE ne + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + CMPU R11, R13 + BNE ne + BDNZ l3 + ADD $4, R3 + ADD $4, R5 + RLWNMCC $0, R4, $15, R4 /* residue */ + BEQ eq + +/* + * do remaining words with LSW; also does unaligned case + */ +l4: + SRAWCC $2, R4, R9 + BLE out + MOVW R9, CTR +l5: + LSW (R3), $4, R10 + ADD $4, R3 + LSW (R5), $4, R11 + ADD $4, R5 + CMPU R10, R11 + BNE ne + BDNZ l5 + RLWNMCC $0, R4, $3, R4 /* residue */ + BEQ eq + +/* + * do remaining bytes with final LSW + */ +out: + MOVW R4, XER + LSW (R3), R10 + LSW (R5), R11 + CMPU R10, R11 + BNE ne + +eq: + MOVW $0, R3 + RETURN + +ne: + MOVW $1, R3 + BGE ret + MOVW $-1,R3 +ret: + RETURN + END diff --git a/sys/src/ape/lib/ap/power/memmove.s b/sys/src/ape/lib/ap/power/memmove.s new file mode 100755 index 000000000..dd6167d7d --- /dev/null +++ b/sys/src/ape/lib/ap/power/memmove.s @@ -0,0 +1,170 @@ +#define BDNZ BC 16,0, + TEXT memmove(SB), $0 + BR move + + TEXT memcpy(SB), $0 +move: + +/* + * performance: + * (tba) + */ + + MOVW R3, s1+0(FP) + MOVW n+8(FP), R9 /* R9 is count */ + MOVW R3, R10 /* R10 is to-pointer */ + CMP R9, $0 + BEQ ret + BLT trap + MOVW s2+4(FP), R11 /* R11 is from-pointer */ + +/* + * if no more than 16 bytes, just use one lsw/stsw + */ + CMP R9, $16 + BLE fout + + ADD R9,R11, R13 /* R13 is end from-pointer */ + ADD R9,R10, R12 /* R12 is end to-pointer */ + +/* + * easiest test is copy backwards if + * destination string has higher mem address + */ + CMPU R10, R11 + BGT back + +/* + * test if both pointers + * are similarly word aligned + */ + XOR R10,R11, R7 + ANDCC $3,R7 + BNE fbad + +/* + * move a few bytes to align pointers + */ + ANDCC $3,R10,R7 + BEQ f2 + SUBC R7, $4, R7 + SUB R7, R9 + MOVW R7, XER + LSW (R11), R16 + ADD R7, R11 + STSW R16, (R10) + ADD R7, R10 + +/* + * turn R14 into doubleword count + * copy 16 bytes at a time while there's room. + */ +f2: + SRAWCC $4, R9, R14 + BLE fout + MOVW R14, CTR + SUB $4, R11 + SUB $4, R10 +f3: + MOVWU 4(R11), R16 + MOVWU R16, 4(R10) + MOVWU 4(R11), R17 + MOVWU R17, 4(R10) + MOVWU 4(R11), R16 + MOVWU R16, 4(R10) + MOVWU 4(R11), R17 + MOVWU R17, 4(R10) + BDNZ f3 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret + ADD $4, R11 + ADD $4, R10 + +/* + * move up to 16 bytes through R16 .. R19; aligned and unaligned + */ +fout: + MOVW R9, XER + LSW (R11), R16 + STSW R16, (R10) + BR ret + +/* + * loop for unaligned copy, then copy up to 15 remaining bytes + */ +fbad: + SRAWCC $4, R9, R14 + BLE f6 + MOVW R14, CTR +f5: + LSW (R11), $16, R16 + ADD $16, R11 + STSW R16, $16, (R10) + ADD $16, R10 + BDNZ f5 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret +f6: + MOVW R9, XER + LSW (R11), R16 + STSW R16, (R10) + BR ret + +/* + * whole thing repeated for backwards + */ +back: + CMP R9, $4 + BLT bout + + XOR R12,R13, R7 + ANDCC $3,R7 + BNE bout +b1: + ANDCC $3,R13, R7 + BEQ b2 + MOVBZU -1(R13), R16 + MOVBZU R16, -1(R12) + SUB $1, R9 + BR b1 +b2: + SRAWCC $4, R9, R14 + BLE b4 + MOVW R14, CTR +b3: + MOVWU -4(R13), R16 + MOVWU R16, -4(R12) + MOVWU -4(R13), R17 + MOVWU R17, -4(R12) + MOVWU -4(R13), R16 + MOVWU R16, -4(R12) + MOVWU -4(R13), R17 + MOVWU R17, -4(R12) + BDNZ b3 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret +b4: + SRAWCC $2, R9, R14 + BLE bout + MOVW R14, CTR +b5: + MOVWU -4(R13), R16 + MOVWU R16, -4(R12) + BDNZ b5 + RLWNMCC $0, R9, $3, R9 /* residue */ + BEQ ret + +bout: + CMPU R13, R11 + BLE ret + MOVBZU -1(R13), R16 + MOVBZU R16, -1(R12) + BR bout + +trap: + MOVW $0, R0 + MOVW 0(R0), R0 + +ret: + MOVW s1+0(FP), R3 + RETURN diff --git a/sys/src/ape/lib/ap/power/memset.s b/sys/src/ape/lib/ap/power/memset.s new file mode 100755 index 000000000..fa6e8d920 --- /dev/null +++ b/sys/src/ape/lib/ap/power/memset.s @@ -0,0 +1,73 @@ + TEXT memset(SB),$0 +#define BDNZ BC 16,0, + MOVW R3, p+0(FP) /* R3 is pointer */ + +/* + * performance: + * about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache + * drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks + */ + + MOVW n+8(FP), R4 /* R4 is count */ + CMP R4, $0 + BLE ret + MOVW c+4(FP), R5 /* R5 is char */ + +/* + * create 16 copies of c in R5 .. R8 + */ + RLWNM $0, R5, $0xff, R5 + RLWMI $8, R5, $0xff00, R5 + RLWMI $16, R5, $0xffff0000, R5 + MOVW R5, R6 + MOVW R5, R7 + MOVW R5, R8 + +/* + * let STSW do the work for 16 characters or less; aligned and unaligned + */ + CMP R4, $16 + BLE out + +/* + * store enough bytes to align pointer + */ + ANDCC $7,R3, R9 + BEQ l2 + SUBC R9, $8, R9 + MOVW R9, XER + STSW R5, (R3) + ADD R9, R3 + SUB R9, R4 + +/* + * store 16 at a time while there's room + * STSW was used here originally, but it's `completion serialised' + */ +l2: + SRAWCC $4, R4, R9 + BLE out + MOVW R9, CTR +l3: + MOVW R5, 0(R3) + ADD $8, R3, R10 + MOVW R6, 4(R3) + MOVW R7, 0(R10) + ADD $8, R10, R3 + MOVW R8, 4(R10) + BDNZ l3 + RLWNMCC $0, R4, $15, R4 /* residue */ + BEQ ret + +/* + * store up to 16 bytes from R5 .. R8; aligned and unaligned + */ + +out: + MOVW R4, XER + STSW R5, (R3) + +ret: + MOVW 0(FP), R3 + RETURN + END diff --git a/sys/src/ape/lib/ap/power/mkfile b/sys/src/ape/lib/ap/power/mkfile new file mode 100755 index 000000000..cc546cd84 --- /dev/null +++ b/sys/src/ape/lib/ap/power/mkfile @@ -0,0 +1,23 @@ +APE=/sys/src/ape +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + cycles.$O\ + getfcr.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + memcmp.$O\ + memmove.$O\ + memset.$O\ + notetramp.$O\ + setjmp.$O\ + strcmp.$O\ + tas.$O\ + vlop.$O\ + vlrt.$O\ + +</sys/src/cmd/mksyslib + +CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE + diff --git a/sys/src/ape/lib/ap/power/notetramp.c b/sys/src/ape/lib/ap/power/notetramp.c new file mode 100755 index 000000000..6477e1b14 --- /dev/null +++ b/sys/src/ape/lib/ap/power/notetramp.c @@ -0,0 +1,72 @@ +#include "../plan9/lib.h" +#include "../plan9/sys9.h" +#include <signal.h> +#include <setjmp.h> + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->pc; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->pc = (unsigned long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->pc = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + + if(j[0]) + _psigblocked = j[1]; + if(nstack == 0 || pcstack[nstack-1].u->sp > j[2+JMPBUFSP]) + longjmp(j+2, ret); + u = pcstack[nstack-1].u; + nstack--; + u->r3 = ret; + if(ret == 0) + u->r3 = 1; + u->pc = j[2+JMPBUFPC]; + u->sp = j[2+JMPBUFSP]; + _NOTED(3); /* NRSTR */ +} diff --git a/sys/src/ape/lib/ap/power/setjmp.s b/sys/src/ape/lib/ap/power/setjmp.s new file mode 100755 index 000000000..0023afcf2 --- /dev/null +++ b/sys/src/ape/lib/ap/power/setjmp.s @@ -0,0 +1,37 @@ +TEXT setjmp(SB), 1, $-4 + MOVW LR, R4 + MOVW R1, (R3) + MOVW R4, 4(R3) + MOVW $0, R3 + RETURN + +TEXT sigsetjmp(SB), 1, $-4 + MOVW savemask+4(FP), R4 + MOVW R4, 0(R3) + MOVW $_psigblocked(SB), R4 + MOVW R4, 4(R3) + MOVW LR, R4 + MOVW R1, 8(R3) + MOVW R4, 12(R3) + MOVW $0, R3 + RETURN + +TEXT longjmp(SB), 1, $-4 + MOVW R3, R4 + MOVW r+4(FP), R3 + CMP R3, $0 + BNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVW $1, R3 /* bless their pointed heads */ +ok: MOVW (R4), R1 + MOVW 4(R4), R4 + MOVW R4, LR + BR (LR) + +/* + * trampoline functions because the kernel smashes r1 + * in the uregs given to notejmp + */ +TEXT __noterestore(SB), 1, $-4 + MOVW R4, R3 + MOVW R5, LR + BR (LR) diff --git a/sys/src/ape/lib/ap/power/strcmp.s b/sys/src/ape/lib/ap/power/strcmp.s new file mode 100755 index 000000000..0aef5b29c --- /dev/null +++ b/sys/src/ape/lib/ap/power/strcmp.s @@ -0,0 +1,21 @@ +TEXT strcmp(SB), $0 + + MOVW s2+4(FP), R4 + + SUB $1, R3 + SUB $1, R4 +l1: + MOVBZU 1(R3), R5 + MOVBZU 1(R4), R6 + CMP R5, R6 + BNE ne + CMP R5, $0 + BNE l1 + MOVW $0, R3 + RETURN +ne: + MOVW $1, R3 + BGT ret + MOVW $-1, R3 +ret: + RETURN diff --git a/sys/src/ape/lib/ap/power/tas.s b/sys/src/ape/lib/ap/power/tas.s new file mode 100755 index 000000000..09fb0c492 --- /dev/null +++ b/sys/src/ape/lib/ap/power/tas.s @@ -0,0 +1,16 @@ +TEXT tas(SB), $0 + SYNC + MOVW R3, R4 + MOVW $0xdeaddead,R5 +tas1: + DCBF (R4) /* fix for 603x bug */ + LWAR (R4), R3 + CMP R3, $0 + BNE tas0 + DCBT (R4) /* fix 405 errata cpu_210 */ + STWCCC R5, (R4) + BNE tas1 +tas0: + SYNC + ISYNC + RETURN diff --git a/sys/src/ape/lib/ap/power/vlop.s b/sys/src/ape/lib/ap/power/vlop.s new file mode 100755 index 000000000..9085da247 --- /dev/null +++ b/sys/src/ape/lib/ap/power/vlop.s @@ -0,0 +1,132 @@ +#define BDNZ BC 16,0, + +/* + * 64/64 division adapted from powerpc compiler writer's handbook + * + * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b) + * quo dvd dvs + * + * Remainder is left in R7:R8 + * + * Code comment notation: + * msw = most-significant (high-order) word, i.e. bits 0..31 + * lsw = least-significant (low-order) word, i.e. bits 32..63 + * LZ = Leading Zeroes + * SD = Significant Digits + * + * R3:R4 = dvd (input dividend); quo (output quotient) + * R5:R6 = dvs (input divisor) + * + * R7:R8 = tmp; rem (output remainder) + */ + +TEXT _divu64(SB), $0 + MOVW a+0(FP), R3 + MOVW a+4(FP), R4 + MOVW b+8(FP), R5 + MOVW b+12(FP), R6 + + /* count the number of leading 0s in the dividend */ + CMP R3, $0 /* dvd.msw == 0? R3, */ + CNTLZW R3, R11 /* R11 = dvd.msw.LZ */ + CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */ + BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */ + ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */ + +lab1: + /* count the number of leading 0s in the divisor */ + CMP R5, $0 /* dvd.msw == 0? */ + CNTLZW R5, R9 /* R9 = dvs.msw.LZ */ + CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */ + BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */ + ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */ + +lab2: + /* determine shift amounts to minimize the number of iterations */ + CMP R11, R9 /* compare dvd.LZ to dvs.LZ */ + SUBC R11, $64, R10 /* R10 = dvd.SD */ + BGT lab9 /* if(dvs > dvd) quotient = 0 */ + ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */ + SUBC R9, $64, R9 /* R9 = dvs.SD */ + ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */ + /* initial dvd */ + SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */ + /* initial tmp */ + MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */ + + /* R7:R8 = R3:R4 >> R9 */ + CMP R9, $32 + ADD $-32, R9, R7 + BLT lab3 /* if(R9 < 32) jump to lab3 */ + SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */ + MOVW $0, R7 /* tmp.msw = 0 */ + BR lab4 +lab3: + SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */ + SUBC R9, $32, R7 + SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */ + OR R7, R8 /* tmp.lsw = R8 | R7 */ + SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */ + +lab4: + /* R3:R4 = R3:R4 << R11 */ + CMP R11,$32 + ADDC $-32, R11, R9 + BLT lab5 /* (R11 < 32)? */ + SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */ + MOVW $0, R4 /* dvd.lsw = 0 */ + BR lab6 + +lab5: + SLW R11, R3 /* R3 = dvd.msw << R11 */ + SUBC R11, $32, R9 + SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */ + OR R9, R3 /* dvd.msw = R3 | R9 */ + SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */ + +lab6: + /* restoring division shift and subtract loop */ + MOVW $-1, R10 + ADDC $0, R7 /* clear carry bit before loop starts */ +lab7: + /* tmp:dvd is considered one large register */ + /* each portion is shifted left 1 bit by adding it to itself */ + /* adde sums the carry from the previous and creates a new carry */ + ADDE R4,R4 /* shift dvd.lsw left 1 bit */ + ADDE R3,R3 /* shift dvd.msw to left 1 bit */ + ADDE R8,R8 /* shift tmp.lsw to left 1 bit */ + ADDE R7,R7 /* shift tmp.msw to left 1 bit */ + SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */ + SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */ + BLT lab8 /* if(result < 0) clear carry bit */ + MOVW R11, R8 /* move lsw */ + MOVW R9, R7 /* move msw */ + ADDC $1, R10, R11 /* set carry bit */ +lab8: + BDNZ lab7 + + ADDE R4,R4 /* quo.lsw (lsb = CA) */ + ADDE R3,R3 /* quo.msw (lsb from lsw) */ + +lab10: + MOVW qp+16(FP), R9 + MOVW rp+20(FP), R10 + CMP R9, $0 + BEQ lab11 + MOVW R3, 0(R9) + MOVW R4, 4(R9) +lab11: + CMP R10, $0 + BEQ lab12 + MOVW R7, 0(R10) + MOVW R8, 4(R10) +lab12: + RETURN + +lab9: + /* Quotient is 0 (dvs > dvd) */ + MOVW R4, R8 /* rmd.lsw = dvd.lsw */ + MOVW R3, R7 /* rmd.msw = dvd.msw */ + MOVW $0, R4 /* dvd.lsw = 0 */ + MOVW $0, R3 /* dvd.msw = 0 */ + BR lab10 diff --git a/sys/src/ape/lib/ap/power/vlrt.c b/sys/src/ape/lib/ap/power/vlrt.c new file mode 100755 index 000000000..681a3b49b --- /dev/null +++ b/sys/src/ape/lib/ap/power/vlrt.c @@ -0,0 +1,254 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + ulong hi; + ulong lo; +}; + +void abort(void); +void _divu64(Vlong, Vlong, Vlong*, Vlong*); + +void +_d2v(Vlong *y, double d) +{ + union { double d; Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + _divu64(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + _divu64(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + _divu64(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + _divu64(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u = *ret; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} |