summaryrefslogtreecommitdiff
path: root/sys/src/libc/power
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/libc/power
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/libc/power')
-rwxr-xr-xsys/src/libc/power/argv0.s4
-rwxr-xr-xsys/src/libc/power/atom.s65
-rwxr-xr-xsys/src/libc/power/cycles.s17
-rwxr-xr-xsys/src/libc/power/getcallerpc.s4
-rwxr-xr-xsys/src/libc/power/getfcr.s28
-rwxr-xr-xsys/src/libc/power/main9.s25
-rwxr-xr-xsys/src/libc/power/main9p.s37
-rwxr-xr-xsys/src/libc/power/memccpy.s23
-rwxr-xr-xsys/src/libc/power/memcmp.s110
-rwxr-xr-xsys/src/libc/power/memmove.s170
-rwxr-xr-xsys/src/libc/power/memset.s73
-rwxr-xr-xsys/src/libc/power/mkfile37
-rwxr-xr-xsys/src/libc/power/notejmp.c22
-rwxr-xr-xsys/src/libc/power/setjmp.s26
-rwxr-xr-xsys/src/libc/power/sqrt.c103
-rwxr-xr-xsys/src/libc/power/strcmp.s21
-rwxr-xr-xsys/src/libc/power/strncmp.s29
-rwxr-xr-xsys/src/libc/power/tas.s14
-rwxr-xr-xsys/src/libc/power/vlop.s132
-rwxr-xr-xsys/src/libc/power/vlrt.c254
20 files changed, 1194 insertions, 0 deletions
diff --git a/sys/src/libc/power/argv0.s b/sys/src/libc/power/argv0.s
new file mode 100755
index 000000000..8d9f9b29b
--- /dev/null
+++ b/sys/src/libc/power/argv0.s
@@ -0,0 +1,4 @@
+GLOBL argv0(SB), $4
+GLOBL _tos(SB), $4
+GLOBL _privates(SB), $4
+GLOBL _nprivates(SB), $4
diff --git a/sys/src/libc/power/atom.s b/sys/src/libc/power/atom.s
new file mode 100755
index 000000000..86776e6ed
--- /dev/null
+++ b/sys/src/libc/power/atom.s
@@ -0,0 +1,65 @@
+TEXT ainc(SB),$0 /* long ainc(long *); */
+ MOVW R3, R4
+xincloop:
+ LWAR (R4), R3
+ ADD $1, R3
+ DCBT (R4) /* fix 405 errata cpu_210 */
+ STWCCC R3, (R4)
+ BNE xincloop
+ RETURN
+
+TEXT adec(SB),$0 /* long adec(long *); */
+ MOVW R3, R4
+xdecloop:
+ LWAR (R4), R3
+ ADD $-1, R3
+ DCBT (R4) /* fix 405 errata cpu_210 */
+ STWCCC R3, (R4)
+ BNE xdecloop
+ RETURN
+
+TEXT loadlink(SB), $0
+
+ LWAR (R3), R3
+ RETURN
+
+TEXT storecond(SB), $0
+
+ MOVW val+4(FP), R4
+ DCBT (R3) /* fix 405 errata cpu_210 */
+ STWCCC R4, (R3)
+ BNE storecondfail
+ MOVW $1, R3
+ RETURN
+storecondfail:
+ MOVW $0, R3
+ RETURN
+
+/*
+ * int cas32(u32int *p, u32int ov, u32int nv);
+ * int cas(uint *p, int ov, int nv);
+ * int casp(void **p, void *ov, void *nv);
+ * int casl(ulong *p, ulong ov, ulong nv);
+ */
+
+TEXT cas32+0(SB),0,$0
+TEXT cas+0(SB),0,$0
+TEXT casp+0(SB),0,$0
+TEXT casl+0(SB),0,$0
+ MOVW ov+4(FP),R4
+ MOVW nv+8(FP),R8
+ LWAR (R3),R5
+ CMP R5,R4
+ BNE fail
+ DCBT (R3) /* fix 405 errata cpu_210 */
+ STWCCC R8,(R3)
+ BNE fail1
+ MOVW $1,R3
+ RETURN
+fail:
+ DCBT (R3) /* fix 405 errata cpu_210 */
+ STWCCC R5,(R3) /* give up exclusive access */
+fail1:
+ MOVW R0,R3
+ RETURN
+ END
diff --git a/sys/src/libc/power/cycles.s b/sys/src/libc/power/cycles.s
new file mode 100755
index 000000000..441171136
--- /dev/null
+++ b/sys/src/libc/power/cycles.s
@@ -0,0 +1,17 @@
+#define TBRL 268
+#define TBRU 269 /* Time base Upper/Lower (Reading) */
+
+/*
+ * time stamp counter; _cycles since power up
+ * Runs at fasthz/4 cycles per second (m->clkin>>3)
+ */
+TEXT cycles(SB),1,$0
+loop:
+ MOVW SPR(TBRU),R7
+ MOVW SPR(TBRL),R8
+ MOVW SPR(TBRU),R5
+ CMP R5,R7
+ BNE loop
+ MOVW R7,0(R3)
+ MOVW R8,4(R3)
+ RETURN
diff --git a/sys/src/libc/power/getcallerpc.s b/sys/src/libc/power/getcallerpc.s
new file mode 100755
index 000000000..62c3ee233
--- /dev/null
+++ b/sys/src/libc/power/getcallerpc.s
@@ -0,0 +1,4 @@
+TEXT getcallerpc(SB),1,$-4
+ MOVW 0(R1), R3
+ RETURN
+
diff --git a/sys/src/libc/power/getfcr.s b/sys/src/libc/power/getfcr.s
new file mode 100755
index 000000000..b61d52e68
--- /dev/null
+++ b/sys/src/libc/power/getfcr.s
@@ -0,0 +1,28 @@
+TEXT getfcr(SB), $8
+ MOVFL FPSCR, F3
+ FMOVD F3, f-8(SP)
+ MOVW -4(SP), R3
+ RETURN
+
+TEXT getfsr(SB), $8
+ MOVFL FPSCR, F3
+ FMOVD F3, f-8(SP)
+ MOVW -4(SP), R3
+ RETURN
+
+TEXT setfcr(SB), $8
+ SYNC
+ MOVW R3, -4(SP)
+ FMOVD -8(SP), F3
+ MOVFL F3, FPSCR
+ ISYNC
+ RETURN
+
+TEXT setfsr(SB), $8
+ SYNC
+ MOVW R3, -4(SP)
+ FMOVD -8(SP), F3
+ MOVFL F3, FPSCR
+ ISYNC
+ RETURN
+
diff --git a/sys/src/libc/power/main9.s b/sys/src/libc/power/main9.s
new file mode 100755
index 000000000..46085adf5
--- /dev/null
+++ b/sys/src/libc/power/main9.s
@@ -0,0 +1,25 @@
+#define NPRIVATES 16
+
+TEXT _main(SB), 1, $(16 + NPRIVATES*4)
+
+ MOVW $setSB(SB), R2
+ MOVW R3, _tos(SB)
+
+ MOVW $p-64(SP), R4
+ MOVW R4, _privates+0(SB)
+ MOVW $16, R4
+ MOVW R4, _nprivates+0(SB)
+
+ MOVW inargc-4(FP), R3
+ MOVW $inargv+0(FP), R4
+ MOVW R3, 4(R1)
+ MOVW R4, 8(R1)
+ BL main(SB)
+loop:
+ MOVW $_exitstr<>(SB), R3
+ MOVW R3, 4(R1)
+ BL exits(SB)
+ BR loop
+
+DATA _exitstr<>+0(SB)/4, $"main"
+GLOBL _exitstr<>+0(SB), $5
diff --git a/sys/src/libc/power/main9p.s b/sys/src/libc/power/main9p.s
new file mode 100755
index 000000000..76723cc10
--- /dev/null
+++ b/sys/src/libc/power/main9p.s
@@ -0,0 +1,37 @@
+#define NPRIVATES 16
+
+TEXT _mainp(SB), 1, $(16 + NPRIVATES*4)
+
+ MOVW $setSB(SB), R2
+ MOVW R3, _tos(SB)
+
+ MOVW $p-64(SP), R4
+ MOVW R4, _privates+0(SB)
+ MOVW $16, R4
+ MOVW R4, _nprivates+0(SB)
+
+ BL _profmain(SB)
+ MOVW _tos(SB), R3
+ MOVW 4(R3), R4
+ MOVW R4, 0(R3)
+ MOVW inargc-4(FP), R3
+ MOVW $inargv+0(FP), R4
+ MOVW R3, 4(R1)
+ MOVW R4, 8(R1)
+ BL main(SB)
+loop:
+ MOVW $exits<>(SB), R3
+ MOVW R3, 4(R1)
+ BL exits(SB)
+ MOVW $_profin(SB), R3 /* force loading of profile */
+ BR loop
+
+TEXT _savearg(SB), 1, $0
+ RETURN
+
+TEXT _callpc(SB), 1, $0
+ MOVW argp-4(FP), R3
+ RETURN
+
+DATA exits<>+0(SB)/4, $"main"
+GLOBL exits<>+0(SB), $5
diff --git a/sys/src/libc/power/memccpy.s b/sys/src/libc/power/memccpy.s
new file mode 100755
index 000000000..4a4a34449
--- /dev/null
+++ b/sys/src/libc/power/memccpy.s
@@ -0,0 +1,23 @@
+ TEXT memccpy(SB), $0
+#define BDNZ BC 16,0,
+ MOVW R3, s1+0(FP)
+ MOVW n+12(FP), R7
+ MOVW s2+4(FP), R4
+ MOVBZ c+11(FP), R5
+ CMP R7, $0
+ BEQ nf
+ MOVW R7, CTR
+ SUB $1, R3
+ SUB $1, R4
+l1:
+ MOVBZU 1(R4), R6
+ CMP R6, R5
+ MOVBZU R6, 1(R3)
+ BEQ eq
+ BDNZ l1
+nf:
+ MOVW $0, R3
+ RETURN
+eq:
+ ADD $1, R3
+ RETURN
diff --git a/sys/src/libc/power/memcmp.s b/sys/src/libc/power/memcmp.s
new file mode 100755
index 000000000..f524fa9d3
--- /dev/null
+++ b/sys/src/libc/power/memcmp.s
@@ -0,0 +1,110 @@
+ TEXT memcmp(SB), $0
+#define BDNZ BC 16,0,
+ MOVW R3, s1+0(FP) /* R3 is pointer1 */
+
+/*
+ * performance:
+ * 67mb/sec aligned; 16mb/sec unaligned
+ */
+
+ MOVW n+8(FP), R4 /* R4 is count */
+ MOVW s2+4(FP), R5 /* R5 is pointer2 */
+
+/*
+ * let LSW do the work for 4 characters or less; aligned and unaligned
+ */
+ CMP R4, $0
+ BLE eq
+ CMP R4, $4
+ BLE out
+
+ XOR R3, R5, R9
+ ANDCC $3, R9
+ BNE l4 /* pointers misaligned; use LSW loop */
+
+/*
+ * do enough bytes to align pointers
+ */
+ ANDCC $3,R3, R9
+ BEQ l2
+ SUBC R9, $4, R9
+ MOVW R9, XER
+ LSW (R3), R10
+ ADD R9, R3
+ LSW (R5), R14
+ ADD R9, R5
+ SUB R9, R4
+ CMPU R10, R14
+ BNE ne
+
+/*
+ * compare 16 at a time
+ */
+l2:
+ SRAWCC $4, R4, R9
+ BLE l4
+ MOVW R9, CTR
+ SUB $4, R3
+ SUB $4, R5
+l3:
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ MOVWU 4(R3), R10
+ MOVWU 4(R5), R12
+ CMPU R11, R13
+ BNE ne
+ MOVWU 4(R3), R11
+ MOVWU 4(R5), R13
+ CMPU R10, R12
+ BNE ne
+ CMPU R11, R13
+ BNE ne
+ BDNZ l3
+ ADD $4, R3
+ ADD $4, R5
+ RLWNMCC $0, R4, $15, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining words with LSW; also does unaligned case
+ */
+l4:
+ SRAWCC $2, R4, R9
+ BLE out
+ MOVW R9, CTR
+l5:
+ LSW (R3), $4, R10
+ ADD $4, R3
+ LSW (R5), $4, R11
+ ADD $4, R5
+ CMPU R10, R11
+ BNE ne
+ BDNZ l5
+ RLWNMCC $0, R4, $3, R4 /* residue */
+ BEQ eq
+
+/*
+ * do remaining bytes with final LSW
+ */
+out:
+ MOVW R4, XER
+ LSW (R3), R10
+ LSW (R5), R11
+ CMPU R10, R11
+ BNE ne
+
+eq:
+ MOVW $0, R3
+ RETURN
+
+ne:
+ MOVW $1, R3
+ BGE ret
+ MOVW $-1,R3
+ret:
+ RETURN
+ END
diff --git a/sys/src/libc/power/memmove.s b/sys/src/libc/power/memmove.s
new file mode 100755
index 000000000..34c1e3c5f
--- /dev/null
+++ b/sys/src/libc/power/memmove.s
@@ -0,0 +1,170 @@
+#define BDNZ BC 16,0,
+ TEXT memmove(SB), $0
+ BR move
+
+ TEXT memcpy(SB), $0
+move:
+
+/*
+ * performance:
+ * (tba)
+ */
+
+ MOVW R3, s1+0(FP)
+ MOVW n+8(FP), R9 /* R9 is count */
+ MOVW R3, R10 /* R10 is to-pointer */
+ CMP R9, $0
+ BEQ ret
+ BLT trap
+ MOVW s2+4(FP), R11 /* R11 is from-pointer */
+
+/*
+ * if no more than 16 bytes, just use one lsw/stsw
+ */
+ CMP R9, $16
+ BLE fout
+
+ ADD R9,R11, R13 /* R13 is end from-pointer */
+ ADD R9,R10, R12 /* R12 is end to-pointer */
+
+/*
+ * easiest test is copy backwards if
+ * destination string has higher mem address
+ */
+ CMPU R10, R11
+ BGT back
+
+/*
+ * test if both pointers
+ * are similarly word aligned
+ */
+ XOR R10,R11, R7
+ ANDCC $3,R7
+ BNE fbad
+
+/*
+ * move a few bytes to align pointers
+ */
+ ANDCC $3,R10,R7
+ BEQ f2
+ SUBC R7, $4, R7
+ SUB R7, R9
+ MOVW R7, XER
+ LSW (R11), R16
+ ADD R7, R11
+ STSW R16, (R10)
+ ADD R7, R10
+
+/*
+ * turn R14 into doubleword count
+ * copy 16 bytes at a time while there's room.
+ */
+f2:
+ SRAWCC $4, R9, R14
+ BLE fout
+ MOVW R14, CTR
+ SUB $4, R11
+ SUB $4, R10
+f3:
+ MOVWU 4(R11), R16
+ MOVWU 4(R11), R17
+ MOVWU 4(R11), R18
+ MOVWU 4(R11), R19
+ MOVWU R16, 4(R10)
+ MOVWU R17, 4(R10)
+ MOVWU R18, 4(R10)
+ MOVWU R19, 4(R10)
+ BDNZ f3
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+ ADD $4, R11
+ ADD $4, R10
+
+/*
+ * move up to 16 bytes through R16 .. R19; aligned and unaligned
+ */
+fout:
+ MOVW R9, XER
+ LSW (R11), R16
+ STSW R16, (R10)
+ BR ret
+
+/*
+ * loop for unaligned copy, then copy up to 15 remaining bytes
+ */
+fbad:
+ SRAWCC $4, R9, R14
+ BLE f6
+ MOVW R14, CTR
+f5:
+ LSW (R11), $16, R16
+ ADD $16, R11
+ STSW R16, $16, (R10)
+ ADD $16, R10
+ BDNZ f5
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+f6:
+ MOVW R9, XER
+ LSW (R11), R16
+ STSW R16, (R10)
+ BR ret
+
+/*
+ * whole thing repeated for backwards
+ */
+back:
+ CMP R9, $4
+ BLT bout
+
+ XOR R12,R13, R7
+ ANDCC $3,R7
+ BNE bout
+b1:
+ ANDCC $3,R13, R7
+ BEQ b2
+ MOVBZU -1(R13), R16
+ MOVBZU R16, -1(R12)
+ SUB $1, R9
+ BR b1
+b2:
+ SRAWCC $4, R9, R14
+ BLE b4
+ MOVW R14, CTR
+b3:
+ MOVWU -4(R13), R16
+ MOVWU -4(R13), R17
+ MOVWU -4(R13), R18
+ MOVWU -4(R13), R19
+ MOVWU R16, -4(R12)
+ MOVWU R17, -4(R12)
+ MOVWU R18, -4(R12)
+ MOVWU R19, -4(R12)
+ BDNZ b3
+ RLWNMCC $0, R9, $15, R9 /* residue */
+ BEQ ret
+b4:
+ SRAWCC $2, R9, R14
+ BLE bout
+ MOVW R14, CTR
+b5:
+ MOVWU -4(R13), R16
+ MOVWU R16, -4(R12)
+ BDNZ b5
+ RLWNMCC $0, R9, $3, R9 /* residue */
+ BEQ ret
+
+bout:
+ CMPU R13, R11
+ BLE ret
+ MOVBZU -1(R13), R16
+ MOVBZU R16, -1(R12)
+ BR bout
+
+trap:
+ MOVW $0, R0
+ MOVW 0(R0), R0
+
+ret:
+ MOVW s1+0(FP), R3
+ RETURN
diff --git a/sys/src/libc/power/memset.s b/sys/src/libc/power/memset.s
new file mode 100755
index 000000000..fa6e8d920
--- /dev/null
+++ b/sys/src/libc/power/memset.s
@@ -0,0 +1,73 @@
+ TEXT memset(SB),$0
+#define BDNZ BC 16,0,
+ MOVW R3, p+0(FP) /* R3 is pointer */
+
+/*
+ * performance:
+ * about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache
+ * drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks
+ */
+
+ MOVW n+8(FP), R4 /* R4 is count */
+ CMP R4, $0
+ BLE ret
+ MOVW c+4(FP), R5 /* R5 is char */
+
+/*
+ * create 16 copies of c in R5 .. R8
+ */
+ RLWNM $0, R5, $0xff, R5
+ RLWMI $8, R5, $0xff00, R5
+ RLWMI $16, R5, $0xffff0000, R5
+ MOVW R5, R6
+ MOVW R5, R7
+ MOVW R5, R8
+
+/*
+ * let STSW do the work for 16 characters or less; aligned and unaligned
+ */
+ CMP R4, $16
+ BLE out
+
+/*
+ * store enough bytes to align pointer
+ */
+ ANDCC $7,R3, R9
+ BEQ l2
+ SUBC R9, $8, R9
+ MOVW R9, XER
+ STSW R5, (R3)
+ ADD R9, R3
+ SUB R9, R4
+
+/*
+ * store 16 at a time while there's room
+ * STSW was used here originally, but it's `completion serialised'
+ */
+l2:
+ SRAWCC $4, R4, R9
+ BLE out
+ MOVW R9, CTR
+l3:
+ MOVW R5, 0(R3)
+ ADD $8, R3, R10
+ MOVW R6, 4(R3)
+ MOVW R7, 0(R10)
+ ADD $8, R10, R3
+ MOVW R8, 4(R10)
+ BDNZ l3
+ RLWNMCC $0, R4, $15, R4 /* residue */
+ BEQ ret
+
+/*
+ * store up to 16 bytes from R5 .. R8; aligned and unaligned
+ */
+
+out:
+ MOVW R4, XER
+ STSW R5, (R3)
+
+ret:
+ MOVW 0(FP), R3
+ RETURN
+ END
diff --git a/sys/src/libc/power/mkfile b/sys/src/libc/power/mkfile
new file mode 100755
index 000000000..c02f8a98d
--- /dev/null
+++ b/sys/src/libc/power/mkfile
@@ -0,0 +1,37 @@
+objtype=power
+</$objtype/mkfile
+
+LIB=/$objtype/lib/libc.a
+SFILES=\
+ argv0.s\
+ atom.s\
+ cycles.s\
+ getcallerpc.s\
+ getfcr.s\
+ main9.s\
+ main9p.s\
+ memccpy.s\
+ memcmp.s\
+ memmove.s\
+ memset.s\
+ setjmp.s\
+ strcmp.s\
+ strncmp.s\
+ tas.s\
+ vlop.s
+
+CFILES=\
+ notejmp.c\
+ sqrt.c\
+ vlrt.c\
+
+HFILES=/sys/include/libc.h
+
+OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O}
+
+UPDATE=mkfile\
+ $HFILES\
+ $CFILES\
+ $SFILES\
+
+</sys/src/cmd/mksyslib
diff --git a/sys/src/libc/power/notejmp.c b/sys/src/libc/power/notejmp.c
new file mode 100755
index 000000000..5394a4d75
--- /dev/null
+++ b/sys/src/libc/power/notejmp.c
@@ -0,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include <ureg.h>
+
+int __noterestore(void);
+
+void
+notejmp(void *vr, jmp_buf j, int ret)
+{
+ struct Ureg *r = vr;
+
+ /*
+ * song and dance to get around the kernel smashing r3 in noted
+ */
+ r->r4 = ret;
+ if(ret == 0)
+ r->r4 = 1;
+ r->r5 = j[JMPBUFPC] - JMPBUFDPC;
+ r->pc = (ulong)__noterestore;
+ r->sp = j[JMPBUFSP];
+ noted(NCONT);
+}
diff --git a/sys/src/libc/power/setjmp.s b/sys/src/libc/power/setjmp.s
new file mode 100755
index 000000000..f3f2f44ae
--- /dev/null
+++ b/sys/src/libc/power/setjmp.s
@@ -0,0 +1,26 @@
+TEXT setjmp(SB), 1, $-4
+ MOVW LR, R4
+ MOVW R1, (R3)
+ MOVW R4, 4(R3)
+ MOVW $0, R3
+ RETURN
+
+TEXT longjmp(SB), 1, $-4
+ MOVW R3, R4
+ MOVW r+4(FP), R3
+ CMP R3, $0
+ BNE ok /* ansi: "longjmp(0) => longjmp(1)" */
+ MOVW $1, R3 /* bless their pointed heads */
+ok: MOVW (R4), R1
+ MOVW 4(R4), R4
+ MOVW R4, LR
+ BR (LR)
+
+/*
+ * trampoline functions because the kernel smashes r1
+ * in the uregs given to notejmp
+ */
+TEXT __noterestore(SB), 1, $-4
+ MOVW R4, R3
+ MOVW R5, LR
+ BR (LR)
diff --git a/sys/src/libc/power/sqrt.c b/sys/src/libc/power/sqrt.c
new file mode 100755
index 000000000..fa27c35ef
--- /dev/null
+++ b/sys/src/libc/power/sqrt.c
@@ -0,0 +1,103 @@
+#include <u.h>
+#include <libc.h>
+
+static long sqtab[64] =
+{
+ 0x6cdb2, 0x726d4, 0x77ea3, 0x7d52f, 0x82a85, 0x87eb1, 0x8d1c0, 0x923bd,
+ 0x974b2, 0x9c4a8, 0xa13a9, 0xa61be, 0xaaeee, 0xafb41, 0xb46bf, 0xb916e,
+ 0xbdb55, 0xc247a, 0xc6ce3, 0xcb495, 0xcfb95, 0xd41ea, 0xd8796, 0xdcca0,
+ 0xe110c, 0xe54dd, 0xe9818, 0xedac0, 0xf1cd9, 0xf5e67, 0xf9f6e, 0xfdfef,
+ 0x01fe0, 0x05ee6, 0x09cfd, 0x0da30, 0x11687, 0x1520c, 0x18cc8, 0x1c6c1,
+ 0x20000, 0x2388a, 0x27068, 0x2a79e, 0x2de32, 0x3142b, 0x3498c, 0x37e5b,
+ 0x3b29d, 0x3e655, 0x41989, 0x44c3b, 0x47e70, 0x4b02b, 0x4e16f, 0x51241,
+ 0x542a2, 0x57296, 0x5a220, 0x5d142, 0x60000, 0x62e5a, 0x65c55, 0x689f2,
+};
+
+double
+sqrt(double arg)
+{
+ int e, ms;
+ double a, t;
+ union
+ {
+ double d;
+ struct
+ {
+ long ms;
+ long ls;
+ };
+ } u;
+
+ u.d = arg;
+ ms = u.ms;
+
+ /*
+ * sign extend the mantissa with
+ * exponent. result should be > 0 for
+ * normal case.
+ */
+ e = ms >> 20;
+ if(e <= 0) {
+ if(e == 0)
+ return 0;
+ return NaN();
+ }
+
+ /*
+ * pick up arg/4 by adjusting exponent
+ */
+ u.ms = ms - (2 << 20);
+ a = u.d;
+
+ /*
+ * use 5 bits of mantissa and 1 bit
+ * of exponent to form table index.
+ * insert exponent/2 - 1.
+ */
+ e = (((e - 1023) >> 1) + 1022) << 20;
+ u.ms = *(long*)((char*)sqtab + ((ms >> 13) & 0xfc)) | e;
+ u.ls = 0;
+
+ /*
+ * three laps of newton
+ */
+ e = 1 << 20;
+ t = u.d;
+ u.d = t + a/t;
+ u.ms -= e; /* u.d /= 2; */
+ t = u.d;
+ u.d = t + a/t;
+ u.ms -= e; /* u.d /= 2; */
+ t = u.d;
+
+ return t + a/t;
+}
+
+/*
+ * this is the program that generated the table.
+ * it calls sqrt by some other means.
+ *
+ * void
+ * main(void)
+ * {
+ * int i;
+ * union U
+ * {
+ * double d;
+ * struct
+ * {
+ * long ms;
+ * long ls;
+ * };
+ * } u;
+ *
+ * for(i=0; i<64; i++) {
+ * u.ms = (i<<15) | 0x3fe04000;
+ * u.ls = 0;
+ * u.d = sqrt(u.d);
+ * print(" 0x%.5lux,", u.ms & 0xfffff);
+ * }
+ * print("\n");
+ * exits(0);
+ * }
+ */
diff --git a/sys/src/libc/power/strcmp.s b/sys/src/libc/power/strcmp.s
new file mode 100755
index 000000000..0aef5b29c
--- /dev/null
+++ b/sys/src/libc/power/strcmp.s
@@ -0,0 +1,21 @@
+TEXT strcmp(SB), $0
+
+ MOVW s2+4(FP), R4
+
+ SUB $1, R3
+ SUB $1, R4
+l1:
+ MOVBZU 1(R3), R5
+ MOVBZU 1(R4), R6
+ CMP R5, R6
+ BNE ne
+ CMP R5, $0
+ BNE l1
+ MOVW $0, R3
+ RETURN
+ne:
+ MOVW $1, R3
+ BGT ret
+ MOVW $-1, R3
+ret:
+ RETURN
diff --git a/sys/src/libc/power/strncmp.s b/sys/src/libc/power/strncmp.s
new file mode 100755
index 000000000..c55962faa
--- /dev/null
+++ b/sys/src/libc/power/strncmp.s
@@ -0,0 +1,29 @@
+TEXT strncmp(SB), $0
+#define BDNZ BC 16,0,
+
+ MOVW s2+4(FP), R4
+ MOVW n+8(FP), R7
+
+ CMP R7, $0
+ MOVW R7, CTR
+ BLE eq
+
+ SUB $1, R3
+ SUB $1, R4
+l1:
+ MOVBZU 1(R3), R5
+ MOVBZU 1(R4), R6
+ CMP R5, R6
+ BNE ne
+ CMP R5, $0
+ BEQ eq
+ BDNZ l1
+eq:
+ MOVW $0, R3
+ RETURN
+ne:
+ MOVW $1, R3
+ BGT ret
+ MOVW $-1, R3
+ret:
+ RETURN
diff --git a/sys/src/libc/power/tas.s b/sys/src/libc/power/tas.s
new file mode 100755
index 000000000..246b18056
--- /dev/null
+++ b/sys/src/libc/power/tas.s
@@ -0,0 +1,14 @@
+TEXT _tas(SB), 1, $-4
+ MOVW R3, R4
+ MOVW $0xdeaddead,R5
+tas1:
+/* DCBF (R4) fix for 603x bug */
+ SYNC
+ LWAR (R4), R3
+ CMP R3, $0
+ BNE tas0
+ DCBT (R4) /* fix 405 errata cpu_210 */
+ STWCCC R5, (R4)
+ BNE tas1
+tas0:
+ RETURN
diff --git a/sys/src/libc/power/vlop.s b/sys/src/libc/power/vlop.s
new file mode 100755
index 000000000..9085da247
--- /dev/null
+++ b/sys/src/libc/power/vlop.s
@@ -0,0 +1,132 @@
+#define BDNZ BC 16,0,
+
+/*
+ * 64/64 division adapted from powerpc compiler writer's handbook
+ *
+ * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
+ * quo dvd dvs
+ *
+ * Remainder is left in R7:R8
+ *
+ * Code comment notation:
+ * msw = most-significant (high-order) word, i.e. bits 0..31
+ * lsw = least-significant (low-order) word, i.e. bits 32..63
+ * LZ = Leading Zeroes
+ * SD = Significant Digits
+ *
+ * R3:R4 = dvd (input dividend); quo (output quotient)
+ * R5:R6 = dvs (input divisor)
+ *
+ * R7:R8 = tmp; rem (output remainder)
+ */
+
+TEXT _divu64(SB), $0
+ MOVW a+0(FP), R3
+ MOVW a+4(FP), R4
+ MOVW b+8(FP), R5
+ MOVW b+12(FP), R6
+
+ /* count the number of leading 0s in the dividend */
+ CMP R3, $0 /* dvd.msw == 0? R3, */
+ CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
+ CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
+ BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
+ ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
+
+lab1:
+ /* count the number of leading 0s in the divisor */
+ CMP R5, $0 /* dvd.msw == 0? */
+ CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
+ CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
+ BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
+ ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
+
+lab2:
+ /* determine shift amounts to minimize the number of iterations */
+ CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
+ SUBC R11, $64, R10 /* R10 = dvd.SD */
+ BGT lab9 /* if(dvs > dvd) quotient = 0 */
+ ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
+ SUBC R9, $64, R9 /* R9 = dvs.SD */
+ ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
+ /* initial dvd */
+ SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
+ /* initial tmp */
+ MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
+
+ /* R7:R8 = R3:R4 >> R9 */
+ CMP R9, $32
+ ADD $-32, R9, R7
+ BLT lab3 /* if(R9 < 32) jump to lab3 */
+ SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
+ MOVW $0, R7 /* tmp.msw = 0 */
+ BR lab4
+lab3:
+ SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
+ SUBC R9, $32, R7
+ SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
+ OR R7, R8 /* tmp.lsw = R8 | R7 */
+ SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
+
+lab4:
+ /* R3:R4 = R3:R4 << R11 */
+ CMP R11,$32
+ ADDC $-32, R11, R9
+ BLT lab5 /* (R11 < 32)? */
+ SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
+ MOVW $0, R4 /* dvd.lsw = 0 */
+ BR lab6
+
+lab5:
+ SLW R11, R3 /* R3 = dvd.msw << R11 */
+ SUBC R11, $32, R9
+ SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
+ OR R9, R3 /* dvd.msw = R3 | R9 */
+ SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
+
+lab6:
+ /* restoring division shift and subtract loop */
+ MOVW $-1, R10
+ ADDC $0, R7 /* clear carry bit before loop starts */
+lab7:
+ /* tmp:dvd is considered one large register */
+ /* each portion is shifted left 1 bit by adding it to itself */
+ /* adde sums the carry from the previous and creates a new carry */
+ ADDE R4,R4 /* shift dvd.lsw left 1 bit */
+ ADDE R3,R3 /* shift dvd.msw to left 1 bit */
+ ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
+ ADDE R7,R7 /* shift tmp.msw to left 1 bit */
+ SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
+ SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
+ BLT lab8 /* if(result < 0) clear carry bit */
+ MOVW R11, R8 /* move lsw */
+ MOVW R9, R7 /* move msw */
+ ADDC $1, R10, R11 /* set carry bit */
+lab8:
+ BDNZ lab7
+
+ ADDE R4,R4 /* quo.lsw (lsb = CA) */
+ ADDE R3,R3 /* quo.msw (lsb from lsw) */
+
+lab10:
+ MOVW qp+16(FP), R9
+ MOVW rp+20(FP), R10
+ CMP R9, $0
+ BEQ lab11
+ MOVW R3, 0(R9)
+ MOVW R4, 4(R9)
+lab11:
+ CMP R10, $0
+ BEQ lab12
+ MOVW R7, 0(R10)
+ MOVW R8, 4(R10)
+lab12:
+ RETURN
+
+lab9:
+ /* Quotient is 0 (dvs > dvd) */
+ MOVW R4, R8 /* rmd.lsw = dvd.lsw */
+ MOVW R3, R7 /* rmd.msw = dvd.msw */
+ MOVW $0, R4 /* dvd.lsw = 0 */
+ MOVW $0, R3 /* dvd.msw = 0 */
+ BR lab10
diff --git a/sys/src/libc/power/vlrt.c b/sys/src/libc/power/vlrt.c
new file mode 100755
index 000000000..681a3b49b
--- /dev/null
+++ b/sys/src/libc/power/vlrt.c
@@ -0,0 +1,254 @@
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+typedef signed char schar;
+
+#define SIGN(n) (1UL<<(n-1))
+
+typedef struct Vlong Vlong;
+struct Vlong
+{
+ ulong hi;
+ ulong lo;
+};
+
+void abort(void);
+void _divu64(Vlong, Vlong, Vlong*, Vlong*);
+
+void
+_d2v(Vlong *y, double d)
+{
+ union { double d; Vlong; } x;
+ ulong xhi, xlo, ylo, yhi;
+ int sh;
+
+ x.d = d;
+
+ xhi = (x.hi & 0xfffff) | 0x100000;
+ xlo = x.lo;
+ sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+ ylo = 0;
+ yhi = 0;
+ if(sh >= 0) {
+ /* v = (hi||lo) >> sh */
+ if(sh < 32) {
+ if(sh == 0) {
+ ylo = xlo;
+ yhi = xhi;
+ } else {
+ ylo = (xlo >> sh) | (xhi << (32-sh));
+ yhi = xhi >> sh;
+ }
+ } else {
+ if(sh == 32) {
+ ylo = xhi;
+ } else
+ if(sh < 64) {
+ ylo = xhi >> (sh-32);
+ }
+ }
+ } else {
+ /* v = (hi||lo) << -sh */
+ sh = -sh;
+ if(sh <= 10) {
+ ylo = xlo << sh;
+ yhi = (xhi << sh) | (xlo >> (32-sh));
+ } else {
+ /* overflow */
+ yhi = d; /* causes something awful */
+ }
+ }
+ if(x.hi & SIGN(32)) {
+ if(ylo != 0) {
+ ylo = -ylo;
+ yhi = ~yhi;
+ } else
+ yhi = -yhi;
+ }
+
+ y->hi = yhi;
+ y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+
+ _d2v(y, f);
+}
+
+double
+_v2d(Vlong x)
+{
+ if(x.hi & SIGN(32)) {
+ if(x.lo) {
+ x.lo = -x.lo;
+ x.hi = ~x.hi;
+ } else
+ x.hi = -x.hi;
+ return -((long)x.hi*4294967296. + x.lo);
+ }
+ return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+ return _v2d(x);
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+
+ if(n.hi == 0 && d.hi == 0) {
+ q->hi = 0;
+ q->lo = n.lo / d.lo;
+ return;
+ }
+ _divu64(n, d, q, 0);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+ if(n.hi == 0 && d.hi == 0) {
+ r->hi = 0;
+ r->lo = n.lo % d.lo;
+ return;
+ }
+ _divu64(n, d, 0, r);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+ if(v->lo == 0) {
+ v->hi = -v->hi;
+ return;
+ }
+ v->lo = -v->lo;
+ v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ q->lo = (long)n.lo / (long)d.lo;
+ q->hi = ((long)q->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ _divu64(n, d, q, 0);
+ if(nneg != dneg)
+ vneg(q);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ r->lo = (long)n.lo % (long)d.lo;
+ r->hi = ((long)r->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ _divu64(n, d, 0, r);
+ if(nneg)
+ vneg(r);
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+ Vlong t, u;
+
+ u = *ret;
+ switch(type) {
+ default:
+ abort();
+ break;
+
+ case 1: /* schar */
+ t.lo = *(schar*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(schar*)lv = u.lo;
+ break;
+
+ case 2: /* uchar */
+ t.lo = *(uchar*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uchar*)lv = u.lo;
+ break;
+
+ case 3: /* short */
+ t.lo = *(short*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(short*)lv = u.lo;
+ break;
+
+ case 4: /* ushort */
+ t.lo = *(ushort*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ushort*)lv = u.lo;
+ break;
+
+ case 9: /* int */
+ t.lo = *(int*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(int*)lv = u.lo;
+ break;
+
+ case 10: /* uint */
+ t.lo = *(uint*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uint*)lv = u.lo;
+ break;
+
+ case 5: /* long */
+ t.lo = *(long*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(long*)lv = u.lo;
+ break;
+
+ case 6: /* ulong */
+ t.lo = *(ulong*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ulong*)lv = u.lo;
+ break;
+
+ case 7: /* vlong */
+ case 8: /* uvlong */
+ fn(&u, *(Vlong*)lv, rv);
+ *(Vlong*)lv = u;
+ break;
+ }
+ *ret = u;
+}