diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/libc/power |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/libc/power')
-rwxr-xr-x | sys/src/libc/power/argv0.s | 4 | ||||
-rwxr-xr-x | sys/src/libc/power/atom.s | 65 | ||||
-rwxr-xr-x | sys/src/libc/power/cycles.s | 17 | ||||
-rwxr-xr-x | sys/src/libc/power/getcallerpc.s | 4 | ||||
-rwxr-xr-x | sys/src/libc/power/getfcr.s | 28 | ||||
-rwxr-xr-x | sys/src/libc/power/main9.s | 25 | ||||
-rwxr-xr-x | sys/src/libc/power/main9p.s | 37 | ||||
-rwxr-xr-x | sys/src/libc/power/memccpy.s | 23 | ||||
-rwxr-xr-x | sys/src/libc/power/memcmp.s | 110 | ||||
-rwxr-xr-x | sys/src/libc/power/memmove.s | 170 | ||||
-rwxr-xr-x | sys/src/libc/power/memset.s | 73 | ||||
-rwxr-xr-x | sys/src/libc/power/mkfile | 37 | ||||
-rwxr-xr-x | sys/src/libc/power/notejmp.c | 22 | ||||
-rwxr-xr-x | sys/src/libc/power/setjmp.s | 26 | ||||
-rwxr-xr-x | sys/src/libc/power/sqrt.c | 103 | ||||
-rwxr-xr-x | sys/src/libc/power/strcmp.s | 21 | ||||
-rwxr-xr-x | sys/src/libc/power/strncmp.s | 29 | ||||
-rwxr-xr-x | sys/src/libc/power/tas.s | 14 | ||||
-rwxr-xr-x | sys/src/libc/power/vlop.s | 132 | ||||
-rwxr-xr-x | sys/src/libc/power/vlrt.c | 254 |
20 files changed, 1194 insertions, 0 deletions
diff --git a/sys/src/libc/power/argv0.s b/sys/src/libc/power/argv0.s new file mode 100755 index 000000000..8d9f9b29b --- /dev/null +++ b/sys/src/libc/power/argv0.s @@ -0,0 +1,4 @@ +GLOBL argv0(SB), $4 +GLOBL _tos(SB), $4 +GLOBL _privates(SB), $4 +GLOBL _nprivates(SB), $4 diff --git a/sys/src/libc/power/atom.s b/sys/src/libc/power/atom.s new file mode 100755 index 000000000..86776e6ed --- /dev/null +++ b/sys/src/libc/power/atom.s @@ -0,0 +1,65 @@ +TEXT ainc(SB),$0 /* long ainc(long *); */ + MOVW R3, R4 +xincloop: + LWAR (R4), R3 + ADD $1, R3 + DCBT (R4) /* fix 405 errata cpu_210 */ + STWCCC R3, (R4) + BNE xincloop + RETURN + +TEXT adec(SB),$0 /* long adec(long *); */ + MOVW R3, R4 +xdecloop: + LWAR (R4), R3 + ADD $-1, R3 + DCBT (R4) /* fix 405 errata cpu_210 */ + STWCCC R3, (R4) + BNE xdecloop + RETURN + +TEXT loadlink(SB), $0 + + LWAR (R3), R3 + RETURN + +TEXT storecond(SB), $0 + + MOVW val+4(FP), R4 + DCBT (R3) /* fix 405 errata cpu_210 */ + STWCCC R4, (R3) + BNE storecondfail + MOVW $1, R3 + RETURN +storecondfail: + MOVW $0, R3 + RETURN + +/* + * int cas32(u32int *p, u32int ov, u32int nv); + * int cas(uint *p, int ov, int nv); + * int casp(void **p, void *ov, void *nv); + * int casl(ulong *p, ulong ov, ulong nv); + */ + +TEXT cas32+0(SB),0,$0 +TEXT cas+0(SB),0,$0 +TEXT casp+0(SB),0,$0 +TEXT casl+0(SB),0,$0 + MOVW ov+4(FP),R4 + MOVW nv+8(FP),R8 + LWAR (R3),R5 + CMP R5,R4 + BNE fail + DCBT (R3) /* fix 405 errata cpu_210 */ + STWCCC R8,(R3) + BNE fail1 + MOVW $1,R3 + RETURN +fail: + DCBT (R3) /* fix 405 errata cpu_210 */ + STWCCC R5,(R3) /* give up exclusive access */ +fail1: + MOVW R0,R3 + RETURN + END diff --git a/sys/src/libc/power/cycles.s b/sys/src/libc/power/cycles.s new file mode 100755 index 000000000..441171136 --- /dev/null +++ b/sys/src/libc/power/cycles.s @@ -0,0 +1,17 @@ +#define TBRL 268 +#define TBRU 269 /* Time base Upper/Lower (Reading) */ + +/* + * time stamp counter; _cycles since power up + * Runs at fasthz/4 cycles per second (m->clkin>>3) + */ +TEXT cycles(SB),1,$0 +loop: + MOVW SPR(TBRU),R7 + MOVW SPR(TBRL),R8 + MOVW SPR(TBRU),R5 + CMP R5,R7 + BNE loop + MOVW R7,0(R3) + MOVW R8,4(R3) + RETURN diff --git a/sys/src/libc/power/getcallerpc.s b/sys/src/libc/power/getcallerpc.s new file mode 100755 index 000000000..62c3ee233 --- /dev/null +++ b/sys/src/libc/power/getcallerpc.s @@ -0,0 +1,4 @@ +TEXT getcallerpc(SB),1,$-4 + MOVW 0(R1), R3 + RETURN + diff --git a/sys/src/libc/power/getfcr.s b/sys/src/libc/power/getfcr.s new file mode 100755 index 000000000..b61d52e68 --- /dev/null +++ b/sys/src/libc/power/getfcr.s @@ -0,0 +1,28 @@ +TEXT getfcr(SB), $8 + MOVFL FPSCR, F3 + FMOVD F3, f-8(SP) + MOVW -4(SP), R3 + RETURN + +TEXT getfsr(SB), $8 + MOVFL FPSCR, F3 + FMOVD F3, f-8(SP) + MOVW -4(SP), R3 + RETURN + +TEXT setfcr(SB), $8 + SYNC + MOVW R3, -4(SP) + FMOVD -8(SP), F3 + MOVFL F3, FPSCR + ISYNC + RETURN + +TEXT setfsr(SB), $8 + SYNC + MOVW R3, -4(SP) + FMOVD -8(SP), F3 + MOVFL F3, FPSCR + ISYNC + RETURN + diff --git a/sys/src/libc/power/main9.s b/sys/src/libc/power/main9.s new file mode 100755 index 000000000..46085adf5 --- /dev/null +++ b/sys/src/libc/power/main9.s @@ -0,0 +1,25 @@ +#define NPRIVATES 16 + +TEXT _main(SB), 1, $(16 + NPRIVATES*4) + + MOVW $setSB(SB), R2 + MOVW R3, _tos(SB) + + MOVW $p-64(SP), R4 + MOVW R4, _privates+0(SB) + MOVW $16, R4 + MOVW R4, _nprivates+0(SB) + + MOVW inargc-4(FP), R3 + MOVW $inargv+0(FP), R4 + MOVW R3, 4(R1) + MOVW R4, 8(R1) + BL main(SB) +loop: + MOVW $_exitstr<>(SB), R3 + MOVW R3, 4(R1) + BL exits(SB) + BR loop + +DATA _exitstr<>+0(SB)/4, $"main" +GLOBL _exitstr<>+0(SB), $5 diff --git a/sys/src/libc/power/main9p.s b/sys/src/libc/power/main9p.s new file mode 100755 index 000000000..76723cc10 --- /dev/null +++ b/sys/src/libc/power/main9p.s @@ -0,0 +1,37 @@ +#define NPRIVATES 16 + +TEXT _mainp(SB), 1, $(16 + NPRIVATES*4) + + MOVW $setSB(SB), R2 + MOVW R3, _tos(SB) + + MOVW $p-64(SP), R4 + MOVW R4, _privates+0(SB) + MOVW $16, R4 + MOVW R4, _nprivates+0(SB) + + BL _profmain(SB) + MOVW _tos(SB), R3 + MOVW 4(R3), R4 + MOVW R4, 0(R3) + MOVW inargc-4(FP), R3 + MOVW $inargv+0(FP), R4 + MOVW R3, 4(R1) + MOVW R4, 8(R1) + BL main(SB) +loop: + MOVW $exits<>(SB), R3 + MOVW R3, 4(R1) + BL exits(SB) + MOVW $_profin(SB), R3 /* force loading of profile */ + BR loop + +TEXT _savearg(SB), 1, $0 + RETURN + +TEXT _callpc(SB), 1, $0 + MOVW argp-4(FP), R3 + RETURN + +DATA exits<>+0(SB)/4, $"main" +GLOBL exits<>+0(SB), $5 diff --git a/sys/src/libc/power/memccpy.s b/sys/src/libc/power/memccpy.s new file mode 100755 index 000000000..4a4a34449 --- /dev/null +++ b/sys/src/libc/power/memccpy.s @@ -0,0 +1,23 @@ + TEXT memccpy(SB), $0 +#define BDNZ BC 16,0, + MOVW R3, s1+0(FP) + MOVW n+12(FP), R7 + MOVW s2+4(FP), R4 + MOVBZ c+11(FP), R5 + CMP R7, $0 + BEQ nf + MOVW R7, CTR + SUB $1, R3 + SUB $1, R4 +l1: + MOVBZU 1(R4), R6 + CMP R6, R5 + MOVBZU R6, 1(R3) + BEQ eq + BDNZ l1 +nf: + MOVW $0, R3 + RETURN +eq: + ADD $1, R3 + RETURN diff --git a/sys/src/libc/power/memcmp.s b/sys/src/libc/power/memcmp.s new file mode 100755 index 000000000..f524fa9d3 --- /dev/null +++ b/sys/src/libc/power/memcmp.s @@ -0,0 +1,110 @@ + TEXT memcmp(SB), $0 +#define BDNZ BC 16,0, + MOVW R3, s1+0(FP) /* R3 is pointer1 */ + +/* + * performance: + * 67mb/sec aligned; 16mb/sec unaligned + */ + + MOVW n+8(FP), R4 /* R4 is count */ + MOVW s2+4(FP), R5 /* R5 is pointer2 */ + +/* + * let LSW do the work for 4 characters or less; aligned and unaligned + */ + CMP R4, $0 + BLE eq + CMP R4, $4 + BLE out + + XOR R3, R5, R9 + ANDCC $3, R9 + BNE l4 /* pointers misaligned; use LSW loop */ + +/* + * do enough bytes to align pointers + */ + ANDCC $3,R3, R9 + BEQ l2 + SUBC R9, $4, R9 + MOVW R9, XER + LSW (R3), R10 + ADD R9, R3 + LSW (R5), R14 + ADD R9, R5 + SUB R9, R4 + CMPU R10, R14 + BNE ne + +/* + * compare 16 at a time + */ +l2: + SRAWCC $4, R4, R9 + BLE l4 + MOVW R9, CTR + SUB $4, R3 + SUB $4, R5 +l3: + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + MOVWU 4(R3), R10 + MOVWU 4(R5), R12 + CMPU R11, R13 + BNE ne + MOVWU 4(R3), R11 + MOVWU 4(R5), R13 + CMPU R10, R12 + BNE ne + CMPU R11, R13 + BNE ne + BDNZ l3 + ADD $4, R3 + ADD $4, R5 + RLWNMCC $0, R4, $15, R4 /* residue */ + BEQ eq + +/* + * do remaining words with LSW; also does unaligned case + */ +l4: + SRAWCC $2, R4, R9 + BLE out + MOVW R9, CTR +l5: + LSW (R3), $4, R10 + ADD $4, R3 + LSW (R5), $4, R11 + ADD $4, R5 + CMPU R10, R11 + BNE ne + BDNZ l5 + RLWNMCC $0, R4, $3, R4 /* residue */ + BEQ eq + +/* + * do remaining bytes with final LSW + */ +out: + MOVW R4, XER + LSW (R3), R10 + LSW (R5), R11 + CMPU R10, R11 + BNE ne + +eq: + MOVW $0, R3 + RETURN + +ne: + MOVW $1, R3 + BGE ret + MOVW $-1,R3 +ret: + RETURN + END diff --git a/sys/src/libc/power/memmove.s b/sys/src/libc/power/memmove.s new file mode 100755 index 000000000..34c1e3c5f --- /dev/null +++ b/sys/src/libc/power/memmove.s @@ -0,0 +1,170 @@ +#define BDNZ BC 16,0, + TEXT memmove(SB), $0 + BR move + + TEXT memcpy(SB), $0 +move: + +/* + * performance: + * (tba) + */ + + MOVW R3, s1+0(FP) + MOVW n+8(FP), R9 /* R9 is count */ + MOVW R3, R10 /* R10 is to-pointer */ + CMP R9, $0 + BEQ ret + BLT trap + MOVW s2+4(FP), R11 /* R11 is from-pointer */ + +/* + * if no more than 16 bytes, just use one lsw/stsw + */ + CMP R9, $16 + BLE fout + + ADD R9,R11, R13 /* R13 is end from-pointer */ + ADD R9,R10, R12 /* R12 is end to-pointer */ + +/* + * easiest test is copy backwards if + * destination string has higher mem address + */ + CMPU R10, R11 + BGT back + +/* + * test if both pointers + * are similarly word aligned + */ + XOR R10,R11, R7 + ANDCC $3,R7 + BNE fbad + +/* + * move a few bytes to align pointers + */ + ANDCC $3,R10,R7 + BEQ f2 + SUBC R7, $4, R7 + SUB R7, R9 + MOVW R7, XER + LSW (R11), R16 + ADD R7, R11 + STSW R16, (R10) + ADD R7, R10 + +/* + * turn R14 into doubleword count + * copy 16 bytes at a time while there's room. + */ +f2: + SRAWCC $4, R9, R14 + BLE fout + MOVW R14, CTR + SUB $4, R11 + SUB $4, R10 +f3: + MOVWU 4(R11), R16 + MOVWU 4(R11), R17 + MOVWU 4(R11), R18 + MOVWU 4(R11), R19 + MOVWU R16, 4(R10) + MOVWU R17, 4(R10) + MOVWU R18, 4(R10) + MOVWU R19, 4(R10) + BDNZ f3 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret + ADD $4, R11 + ADD $4, R10 + +/* + * move up to 16 bytes through R16 .. R19; aligned and unaligned + */ +fout: + MOVW R9, XER + LSW (R11), R16 + STSW R16, (R10) + BR ret + +/* + * loop for unaligned copy, then copy up to 15 remaining bytes + */ +fbad: + SRAWCC $4, R9, R14 + BLE f6 + MOVW R14, CTR +f5: + LSW (R11), $16, R16 + ADD $16, R11 + STSW R16, $16, (R10) + ADD $16, R10 + BDNZ f5 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret +f6: + MOVW R9, XER + LSW (R11), R16 + STSW R16, (R10) + BR ret + +/* + * whole thing repeated for backwards + */ +back: + CMP R9, $4 + BLT bout + + XOR R12,R13, R7 + ANDCC $3,R7 + BNE bout +b1: + ANDCC $3,R13, R7 + BEQ b2 + MOVBZU -1(R13), R16 + MOVBZU R16, -1(R12) + SUB $1, R9 + BR b1 +b2: + SRAWCC $4, R9, R14 + BLE b4 + MOVW R14, CTR +b3: + MOVWU -4(R13), R16 + MOVWU -4(R13), R17 + MOVWU -4(R13), R18 + MOVWU -4(R13), R19 + MOVWU R16, -4(R12) + MOVWU R17, -4(R12) + MOVWU R18, -4(R12) + MOVWU R19, -4(R12) + BDNZ b3 + RLWNMCC $0, R9, $15, R9 /* residue */ + BEQ ret +b4: + SRAWCC $2, R9, R14 + BLE bout + MOVW R14, CTR +b5: + MOVWU -4(R13), R16 + MOVWU R16, -4(R12) + BDNZ b5 + RLWNMCC $0, R9, $3, R9 /* residue */ + BEQ ret + +bout: + CMPU R13, R11 + BLE ret + MOVBZU -1(R13), R16 + MOVBZU R16, -1(R12) + BR bout + +trap: + MOVW $0, R0 + MOVW 0(R0), R0 + +ret: + MOVW s1+0(FP), R3 + RETURN diff --git a/sys/src/libc/power/memset.s b/sys/src/libc/power/memset.s new file mode 100755 index 000000000..fa6e8d920 --- /dev/null +++ b/sys/src/libc/power/memset.s @@ -0,0 +1,73 @@ + TEXT memset(SB),$0 +#define BDNZ BC 16,0, + MOVW R3, p+0(FP) /* R3 is pointer */ + +/* + * performance: + * about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache + * drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks + */ + + MOVW n+8(FP), R4 /* R4 is count */ + CMP R4, $0 + BLE ret + MOVW c+4(FP), R5 /* R5 is char */ + +/* + * create 16 copies of c in R5 .. R8 + */ + RLWNM $0, R5, $0xff, R5 + RLWMI $8, R5, $0xff00, R5 + RLWMI $16, R5, $0xffff0000, R5 + MOVW R5, R6 + MOVW R5, R7 + MOVW R5, R8 + +/* + * let STSW do the work for 16 characters or less; aligned and unaligned + */ + CMP R4, $16 + BLE out + +/* + * store enough bytes to align pointer + */ + ANDCC $7,R3, R9 + BEQ l2 + SUBC R9, $8, R9 + MOVW R9, XER + STSW R5, (R3) + ADD R9, R3 + SUB R9, R4 + +/* + * store 16 at a time while there's room + * STSW was used here originally, but it's `completion serialised' + */ +l2: + SRAWCC $4, R4, R9 + BLE out + MOVW R9, CTR +l3: + MOVW R5, 0(R3) + ADD $8, R3, R10 + MOVW R6, 4(R3) + MOVW R7, 0(R10) + ADD $8, R10, R3 + MOVW R8, 4(R10) + BDNZ l3 + RLWNMCC $0, R4, $15, R4 /* residue */ + BEQ ret + +/* + * store up to 16 bytes from R5 .. R8; aligned and unaligned + */ + +out: + MOVW R4, XER + STSW R5, (R3) + +ret: + MOVW 0(FP), R3 + RETURN + END diff --git a/sys/src/libc/power/mkfile b/sys/src/libc/power/mkfile new file mode 100755 index 000000000..c02f8a98d --- /dev/null +++ b/sys/src/libc/power/mkfile @@ -0,0 +1,37 @@ +objtype=power +</$objtype/mkfile + +LIB=/$objtype/lib/libc.a +SFILES=\ + argv0.s\ + atom.s\ + cycles.s\ + getcallerpc.s\ + getfcr.s\ + main9.s\ + main9p.s\ + memccpy.s\ + memcmp.s\ + memmove.s\ + memset.s\ + setjmp.s\ + strcmp.s\ + strncmp.s\ + tas.s\ + vlop.s + +CFILES=\ + notejmp.c\ + sqrt.c\ + vlrt.c\ + +HFILES=/sys/include/libc.h + +OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $CFILES\ + $SFILES\ + +</sys/src/cmd/mksyslib diff --git a/sys/src/libc/power/notejmp.c b/sys/src/libc/power/notejmp.c new file mode 100755 index 000000000..5394a4d75 --- /dev/null +++ b/sys/src/libc/power/notejmp.c @@ -0,0 +1,22 @@ +#include <u.h> +#include <libc.h> +#include <ureg.h> + +int __noterestore(void); + +void +notejmp(void *vr, jmp_buf j, int ret) +{ + struct Ureg *r = vr; + + /* + * song and dance to get around the kernel smashing r3 in noted + */ + r->r4 = ret; + if(ret == 0) + r->r4 = 1; + r->r5 = j[JMPBUFPC] - JMPBUFDPC; + r->pc = (ulong)__noterestore; + r->sp = j[JMPBUFSP]; + noted(NCONT); +} diff --git a/sys/src/libc/power/setjmp.s b/sys/src/libc/power/setjmp.s new file mode 100755 index 000000000..f3f2f44ae --- /dev/null +++ b/sys/src/libc/power/setjmp.s @@ -0,0 +1,26 @@ +TEXT setjmp(SB), 1, $-4 + MOVW LR, R4 + MOVW R1, (R3) + MOVW R4, 4(R3) + MOVW $0, R3 + RETURN + +TEXT longjmp(SB), 1, $-4 + MOVW R3, R4 + MOVW r+4(FP), R3 + CMP R3, $0 + BNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVW $1, R3 /* bless their pointed heads */ +ok: MOVW (R4), R1 + MOVW 4(R4), R4 + MOVW R4, LR + BR (LR) + +/* + * trampoline functions because the kernel smashes r1 + * in the uregs given to notejmp + */ +TEXT __noterestore(SB), 1, $-4 + MOVW R4, R3 + MOVW R5, LR + BR (LR) diff --git a/sys/src/libc/power/sqrt.c b/sys/src/libc/power/sqrt.c new file mode 100755 index 000000000..fa27c35ef --- /dev/null +++ b/sys/src/libc/power/sqrt.c @@ -0,0 +1,103 @@ +#include <u.h> +#include <libc.h> + +static long sqtab[64] = +{ + 0x6cdb2, 0x726d4, 0x77ea3, 0x7d52f, 0x82a85, 0x87eb1, 0x8d1c0, 0x923bd, + 0x974b2, 0x9c4a8, 0xa13a9, 0xa61be, 0xaaeee, 0xafb41, 0xb46bf, 0xb916e, + 0xbdb55, 0xc247a, 0xc6ce3, 0xcb495, 0xcfb95, 0xd41ea, 0xd8796, 0xdcca0, + 0xe110c, 0xe54dd, 0xe9818, 0xedac0, 0xf1cd9, 0xf5e67, 0xf9f6e, 0xfdfef, + 0x01fe0, 0x05ee6, 0x09cfd, 0x0da30, 0x11687, 0x1520c, 0x18cc8, 0x1c6c1, + 0x20000, 0x2388a, 0x27068, 0x2a79e, 0x2de32, 0x3142b, 0x3498c, 0x37e5b, + 0x3b29d, 0x3e655, 0x41989, 0x44c3b, 0x47e70, 0x4b02b, 0x4e16f, 0x51241, + 0x542a2, 0x57296, 0x5a220, 0x5d142, 0x60000, 0x62e5a, 0x65c55, 0x689f2, +}; + +double +sqrt(double arg) +{ + int e, ms; + double a, t; + union + { + double d; + struct + { + long ms; + long ls; + }; + } u; + + u.d = arg; + ms = u.ms; + + /* + * sign extend the mantissa with + * exponent. result should be > 0 for + * normal case. + */ + e = ms >> 20; + if(e <= 0) { + if(e == 0) + return 0; + return NaN(); + } + + /* + * pick up arg/4 by adjusting exponent + */ + u.ms = ms - (2 << 20); + a = u.d; + + /* + * use 5 bits of mantissa and 1 bit + * of exponent to form table index. + * insert exponent/2 - 1. + */ + e = (((e - 1023) >> 1) + 1022) << 20; + u.ms = *(long*)((char*)sqtab + ((ms >> 13) & 0xfc)) | e; + u.ls = 0; + + /* + * three laps of newton + */ + e = 1 << 20; + t = u.d; + u.d = t + a/t; + u.ms -= e; /* u.d /= 2; */ + t = u.d; + u.d = t + a/t; + u.ms -= e; /* u.d /= 2; */ + t = u.d; + + return t + a/t; +} + +/* + * this is the program that generated the table. + * it calls sqrt by some other means. + * + * void + * main(void) + * { + * int i; + * union U + * { + * double d; + * struct + * { + * long ms; + * long ls; + * }; + * } u; + * + * for(i=0; i<64; i++) { + * u.ms = (i<<15) | 0x3fe04000; + * u.ls = 0; + * u.d = sqrt(u.d); + * print(" 0x%.5lux,", u.ms & 0xfffff); + * } + * print("\n"); + * exits(0); + * } + */ diff --git a/sys/src/libc/power/strcmp.s b/sys/src/libc/power/strcmp.s new file mode 100755 index 000000000..0aef5b29c --- /dev/null +++ b/sys/src/libc/power/strcmp.s @@ -0,0 +1,21 @@ +TEXT strcmp(SB), $0 + + MOVW s2+4(FP), R4 + + SUB $1, R3 + SUB $1, R4 +l1: + MOVBZU 1(R3), R5 + MOVBZU 1(R4), R6 + CMP R5, R6 + BNE ne + CMP R5, $0 + BNE l1 + MOVW $0, R3 + RETURN +ne: + MOVW $1, R3 + BGT ret + MOVW $-1, R3 +ret: + RETURN diff --git a/sys/src/libc/power/strncmp.s b/sys/src/libc/power/strncmp.s new file mode 100755 index 000000000..c55962faa --- /dev/null +++ b/sys/src/libc/power/strncmp.s @@ -0,0 +1,29 @@ +TEXT strncmp(SB), $0 +#define BDNZ BC 16,0, + + MOVW s2+4(FP), R4 + MOVW n+8(FP), R7 + + CMP R7, $0 + MOVW R7, CTR + BLE eq + + SUB $1, R3 + SUB $1, R4 +l1: + MOVBZU 1(R3), R5 + MOVBZU 1(R4), R6 + CMP R5, R6 + BNE ne + CMP R5, $0 + BEQ eq + BDNZ l1 +eq: + MOVW $0, R3 + RETURN +ne: + MOVW $1, R3 + BGT ret + MOVW $-1, R3 +ret: + RETURN diff --git a/sys/src/libc/power/tas.s b/sys/src/libc/power/tas.s new file mode 100755 index 000000000..246b18056 --- /dev/null +++ b/sys/src/libc/power/tas.s @@ -0,0 +1,14 @@ +TEXT _tas(SB), 1, $-4 + MOVW R3, R4 + MOVW $0xdeaddead,R5 +tas1: +/* DCBF (R4) fix for 603x bug */ + SYNC + LWAR (R4), R3 + CMP R3, $0 + BNE tas0 + DCBT (R4) /* fix 405 errata cpu_210 */ + STWCCC R5, (R4) + BNE tas1 +tas0: + RETURN diff --git a/sys/src/libc/power/vlop.s b/sys/src/libc/power/vlop.s new file mode 100755 index 000000000..9085da247 --- /dev/null +++ b/sys/src/libc/power/vlop.s @@ -0,0 +1,132 @@ +#define BDNZ BC 16,0, + +/* + * 64/64 division adapted from powerpc compiler writer's handbook + * + * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b) + * quo dvd dvs + * + * Remainder is left in R7:R8 + * + * Code comment notation: + * msw = most-significant (high-order) word, i.e. bits 0..31 + * lsw = least-significant (low-order) word, i.e. bits 32..63 + * LZ = Leading Zeroes + * SD = Significant Digits + * + * R3:R4 = dvd (input dividend); quo (output quotient) + * R5:R6 = dvs (input divisor) + * + * R7:R8 = tmp; rem (output remainder) + */ + +TEXT _divu64(SB), $0 + MOVW a+0(FP), R3 + MOVW a+4(FP), R4 + MOVW b+8(FP), R5 + MOVW b+12(FP), R6 + + /* count the number of leading 0s in the dividend */ + CMP R3, $0 /* dvd.msw == 0? R3, */ + CNTLZW R3, R11 /* R11 = dvd.msw.LZ */ + CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */ + BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */ + ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */ + +lab1: + /* count the number of leading 0s in the divisor */ + CMP R5, $0 /* dvd.msw == 0? */ + CNTLZW R5, R9 /* R9 = dvs.msw.LZ */ + CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */ + BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */ + ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */ + +lab2: + /* determine shift amounts to minimize the number of iterations */ + CMP R11, R9 /* compare dvd.LZ to dvs.LZ */ + SUBC R11, $64, R10 /* R10 = dvd.SD */ + BGT lab9 /* if(dvs > dvd) quotient = 0 */ + ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */ + SUBC R9, $64, R9 /* R9 = dvs.SD */ + ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */ + /* initial dvd */ + SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */ + /* initial tmp */ + MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */ + + /* R7:R8 = R3:R4 >> R9 */ + CMP R9, $32 + ADD $-32, R9, R7 + BLT lab3 /* if(R9 < 32) jump to lab3 */ + SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */ + MOVW $0, R7 /* tmp.msw = 0 */ + BR lab4 +lab3: + SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */ + SUBC R9, $32, R7 + SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */ + OR R7, R8 /* tmp.lsw = R8 | R7 */ + SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */ + +lab4: + /* R3:R4 = R3:R4 << R11 */ + CMP R11,$32 + ADDC $-32, R11, R9 + BLT lab5 /* (R11 < 32)? */ + SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */ + MOVW $0, R4 /* dvd.lsw = 0 */ + BR lab6 + +lab5: + SLW R11, R3 /* R3 = dvd.msw << R11 */ + SUBC R11, $32, R9 + SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */ + OR R9, R3 /* dvd.msw = R3 | R9 */ + SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */ + +lab6: + /* restoring division shift and subtract loop */ + MOVW $-1, R10 + ADDC $0, R7 /* clear carry bit before loop starts */ +lab7: + /* tmp:dvd is considered one large register */ + /* each portion is shifted left 1 bit by adding it to itself */ + /* adde sums the carry from the previous and creates a new carry */ + ADDE R4,R4 /* shift dvd.lsw left 1 bit */ + ADDE R3,R3 /* shift dvd.msw to left 1 bit */ + ADDE R8,R8 /* shift tmp.lsw to left 1 bit */ + ADDE R7,R7 /* shift tmp.msw to left 1 bit */ + SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */ + SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */ + BLT lab8 /* if(result < 0) clear carry bit */ + MOVW R11, R8 /* move lsw */ + MOVW R9, R7 /* move msw */ + ADDC $1, R10, R11 /* set carry bit */ +lab8: + BDNZ lab7 + + ADDE R4,R4 /* quo.lsw (lsb = CA) */ + ADDE R3,R3 /* quo.msw (lsb from lsw) */ + +lab10: + MOVW qp+16(FP), R9 + MOVW rp+20(FP), R10 + CMP R9, $0 + BEQ lab11 + MOVW R3, 0(R9) + MOVW R4, 4(R9) +lab11: + CMP R10, $0 + BEQ lab12 + MOVW R7, 0(R10) + MOVW R8, 4(R10) +lab12: + RETURN + +lab9: + /* Quotient is 0 (dvs > dvd) */ + MOVW R4, R8 /* rmd.lsw = dvd.lsw */ + MOVW R3, R7 /* rmd.msw = dvd.msw */ + MOVW $0, R4 /* dvd.lsw = 0 */ + MOVW $0, R3 /* dvd.msw = 0 */ + BR lab10 diff --git a/sys/src/libc/power/vlrt.c b/sys/src/libc/power/vlrt.c new file mode 100755 index 000000000..681a3b49b --- /dev/null +++ b/sys/src/libc/power/vlrt.c @@ -0,0 +1,254 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + ulong hi; + ulong lo; +}; + +void abort(void); +void _divu64(Vlong, Vlong, Vlong*, Vlong*); + +void +_d2v(Vlong *y, double d) +{ + union { double d; Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + _divu64(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + _divu64(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + _divu64(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + _divu64(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u = *ret; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} |