summaryrefslogtreecommitdiff
path: root/sys/src/ape/lib/ap/arm
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/arm
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/arm')
-rwxr-xr-xsys/src/ape/lib/ap/arm/cycles.c5
-rwxr-xr-xsys/src/ape/lib/ap/arm/div.s118
-rwxr-xr-xsys/src/ape/lib/ap/arm/getfcr.s16
-rwxr-xr-xsys/src/ape/lib/ap/arm/lock.c26
-rwxr-xr-xsys/src/ape/lib/ap/arm/main9.s17
-rwxr-xr-xsys/src/ape/lib/ap/arm/main9p.s52
-rwxr-xr-xsys/src/ape/lib/ap/arm/memmove.s212
-rwxr-xr-xsys/src/ape/lib/ap/arm/memset.s60
-rwxr-xr-xsys/src/ape/lib/ap/arm/mkfile25
-rwxr-xr-xsys/src/ape/lib/ap/arm/notetramp.c72
-rwxr-xr-xsys/src/ape/lib/ap/arm/setjmp.s29
-rwxr-xr-xsys/src/ape/lib/ap/arm/strchr.s56
-rwxr-xr-xsys/src/ape/lib/ap/arm/strcmp.s67
-rwxr-xr-xsys/src/ape/lib/ap/arm/strcpy.s46
-rwxr-xr-xsys/src/ape/lib/ap/arm/tas.s5
-rwxr-xr-xsys/src/ape/lib/ap/arm/vlop.s13
-rwxr-xr-xsys/src/ape/lib/ap/arm/vlrt.c708
17 files changed, 1527 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/arm/cycles.c b/sys/src/ape/lib/ap/arm/cycles.c
new file mode 100755
index 000000000..1c32bc732
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/cycles.c
@@ -0,0 +1,5 @@
+void
+_cycles(unsigned long long *u)
+{
+ *u = 0;
+}
diff --git a/sys/src/ape/lib/ap/arm/div.s b/sys/src/ape/lib/ap/arm/div.s
new file mode 100755
index 000000000..2f7699c50
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/div.s
@@ -0,0 +1,118 @@
+Q = 0
+N = 1
+D = 2
+CC = 3
+TMP = 11
+
+TEXT save<>(SB), 1, $0
+ MOVW R(Q), 0(FP)
+ MOVW R(N), 4(FP)
+ MOVW R(D), 8(FP)
+ MOVW R(CC), 12(FP)
+
+ MOVW R(TMP), R(Q) /* numerator */
+ MOVW 20(FP), R(D) /* denominator */
+ CMP $0, R(D)
+ BNE s1
+ MOVW -1(R(D)), R(TMP) /* divide by zero fault */
+s1: RET
+
+TEXT rest<>(SB), 1, $0
+ MOVW 0(FP), R(Q)
+ MOVW 4(FP), R(N)
+ MOVW 8(FP), R(D)
+ MOVW 12(FP), R(CC)
+/*
+ * return to caller
+ * of rest<>
+ */
+ MOVW 0(R13), R14
+ ADD $20, R13
+ B (R14)
+
+TEXT div<>(SB), 1, $0
+ MOVW $32, R(CC)
+/*
+ * skip zeros 8-at-a-time
+ */
+e1:
+ AND.S $(0xff<<24),R(Q), R(N)
+ BNE e2
+ SLL $8, R(Q)
+ SUB.S $8, R(CC)
+ BNE e1
+ RET
+e2:
+ MOVW $0, R(N)
+
+loop:
+/*
+ * shift R(N||Q) left one
+ */
+ SLL $1, R(N)
+ CMP $0, R(Q)
+ ORR.LT $1, R(N)
+ SLL $1, R(Q)
+
+/*
+ * compare numerator to denominator
+ * if less, subtract and set quotent bit
+ */
+ CMP R(D), R(N)
+ ORR.HS $1, R(Q)
+ SUB.HS R(D), R(N)
+ SUB.S $1, R(CC)
+ BNE loop
+ RET
+
+TEXT _div(SB), 1, $16
+ BL save<>(SB)
+ CMP $0, R(Q)
+ BGE d1
+ RSB $0, R(Q), R(Q)
+ CMP $0, R(D)
+ BGE d2
+ RSB $0, R(D), R(D)
+d0:
+ BL div<>(SB) /* none/both neg */
+ MOVW R(Q), R(TMP)
+ B out
+d1:
+ CMP $0, R(D)
+ BGE d0
+ RSB $0, R(D), R(D)
+d2:
+ BL div<>(SB) /* one neg */
+ RSB $0, R(Q), R(TMP)
+ B out
+
+TEXT _mod(SB), 1, $16
+ BL save<>(SB)
+ CMP $0, R(D)
+ RSB.LT $0, R(D), R(D)
+ CMP $0, R(Q)
+ BGE m1
+ RSB $0, R(Q), R(Q)
+ BL div<>(SB) /* neg numerator */
+ RSB $0, R(N), R(TMP)
+ B out
+m1:
+ BL div<>(SB) /* pos numerator */
+ MOVW R(N), R(TMP)
+ B out
+
+TEXT _divu(SB), 1, $16
+ BL save<>(SB)
+ BL div<>(SB)
+ MOVW R(Q), R(TMP)
+ B out
+
+TEXT _modu(SB), 1, $16
+ BL save<>(SB)
+ BL div<>(SB)
+ MOVW R(N), R(TMP)
+ B out
+
+out:
+ BL rest<>(SB)
+ B out
diff --git a/sys/src/ape/lib/ap/arm/getfcr.s b/sys/src/ape/lib/ap/arm/getfcr.s
new file mode 100755
index 000000000..b27c7f082
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/getfcr.s
@@ -0,0 +1,16 @@
+
+TEXT setfcr(SB), $4
+ MOVW R0, FPCR
+ RET
+
+TEXT getfcr(SB), $4
+ MOVW FPCR, R0
+ RET
+
+TEXT getfsr(SB), $0
+ MOVW FPSR, R0
+ RET
+
+TEXT setfsr(SB), $0
+ MOVW R0, FPSR
+ RET
diff --git a/sys/src/ape/lib/ap/arm/lock.c b/sys/src/ape/lib/ap/arm/lock.c
new file mode 100755
index 000000000..91c0ba233
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/lock.c
@@ -0,0 +1,26 @@
+#define _LOCK_EXTENSION
+#include "../plan9/sys9.h"
+#include <lock.h>
+
+int tas(int*);
+
+void
+lock(Lock *lk)
+{
+ while(tas(&lk->val))
+ _SLEEP(0);
+}
+
+int
+canlock(Lock *lk)
+{
+ if(tas(&lk->val))
+ return 0;
+ return 1;
+}
+
+void
+unlock(Lock *lk)
+{
+ lk->val = 0;
+}
diff --git a/sys/src/ape/lib/ap/arm/main9.s b/sys/src/ape/lib/ap/arm/main9.s
new file mode 100755
index 000000000..cd04bea17
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/main9.s
@@ -0,0 +1,17 @@
+arg=0
+sp=13
+sb=12
+
+TEXT _main(SB), 1, $16
+ MOVW $setR12(SB), R(sb)
+ BL _envsetup(SB)
+ MOVW $inargv+0(FP), R(arg)
+ MOVW R(arg), 8(R(sp))
+ MOVW inargc-4(FP), R(arg)
+ MOVW R(arg), 4(R(sp))
+ BL main(SB)
+loop:
+ MOVW R(arg), 4(R(sp))
+ BL exit(SB)
+ BL _div(SB)
+ B loop
diff --git a/sys/src/ape/lib/ap/arm/main9p.s b/sys/src/ape/lib/ap/arm/main9p.s
new file mode 100755
index 000000000..f5dc65344
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/main9p.s
@@ -0,0 +1,52 @@
+arg=0
+sp=13
+sb=12
+
+#define NPRIVATES 16
+
+GLOBL _tos(SB), $4
+GLOBL _privates(SB), $4
+GLOBL _nprivates(SB), $4
+
+TEXT _mainp(SB), 1, $(3*4+NPRIVATES*4)
+
+ MOVW $setR12(SB), R(sb)
+
+ /* _tos = arg */
+ MOVW R(arg), _tos(SB)
+ MOVW $private+8(SP), R1
+ MOVW R1, _privates(SB)
+ MOVW $NPRIVATES, R1
+ MOVW R1, _nprivates(SB)
+
+ /* _profmain(); */
+ BL _profmain(SB)
+
+ /* _tos->prof.pp = _tos->prof.next; */
+ MOVW _tos+0(SB),R1
+ MOVW 4(R1), R2
+ MOVW R2, 0(R1)
+
+ BL _envsetup(SB)
+
+ /* main(argc, argv, environ); */
+ MOVW $inargv+0(FP), R(arg)
+ MOVW R(arg), 8(R(sp))
+ MOVW inargc-4(FP), R(arg)
+ MOVW R(arg), 4(R(sp))
+ MOVW environ(SB), R(arg)
+ MOVW R(arg), 8(R(sp))
+ BL main(SB)
+loop:
+ MOVW R(arg), 4(R(sp))
+ BL exit(SB)
+ MOVW $_div(SB), R(arg) /* force loading of div */
+ MOVW $_profin(SB), R(arg) /* force loading of profile */
+ B loop
+
+TEXT _savearg(SB), 1, $0
+ RET
+
+TEXT _callpc(SB), 1, $0
+ MOVW argp-4(FP), R(arg)
+ RET
diff --git a/sys/src/ape/lib/ap/arm/memmove.s b/sys/src/ape/lib/ap/arm/memmove.s
new file mode 100755
index 000000000..346a23d72
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/memmove.s
@@ -0,0 +1,212 @@
+TS = 0
+TE = 1
+FROM = 2
+N = 3
+TMP = 3 /* N and TMP don't overlap */
+TMP1 = 4
+
+TEXT memcpy(SB), $-4
+ B _memmove
+TEXT memmove(SB), $-4
+_memmove:
+ MOVW R(TS), to+0(FP) /* need to save for return value */
+ MOVW from+4(FP), R(FROM)
+ MOVW n+8(FP), R(N)
+
+ ADD R(N), R(TS), R(TE) /* to end pointer */
+
+ CMP R(FROM), R(TS)
+ BLS _forward
+
+_back:
+ ADD R(N), R(FROM) /* from end pointer */
+ CMP $4, R(N) /* need at least 4 bytes to copy */
+ BLT _b1tail
+
+_b4align: /* align destination on 4 */
+ AND.S $3, R(TE), R(TMP)
+ BEQ _b4aligned
+
+ MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */
+ MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */
+ B _b4align
+
+_b4aligned: /* is source now aligned? */
+ AND.S $3, R(FROM), R(TMP)
+ BNE _bunaligned
+
+ ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */
+_b32loop:
+ CMP R(TMP), R(TE)
+ BLS _b4tail
+
+ MOVM.DB.W (R(FROM)), [R4-R7]
+ MOVM.DB.W [R4-R7], (R(TE))
+ MOVM.DB.W (R(FROM)), [R4-R7]
+ MOVM.DB.W [R4-R7], (R(TE))
+ B _b32loop
+
+_b4tail: /* do remaining words if possible */
+ ADD $3, R(TS), R(TMP)
+_b4loop:
+ CMP R(TMP), R(TE)
+ BLS _b1tail
+
+ MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */
+ MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */
+ B _b4loop
+
+_b1tail: /* remaining bytes */
+ CMP R(TE), R(TS)
+ BEQ _return
+
+ MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */
+ MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */
+ B _b1tail
+
+_forward:
+ CMP $4, R(N) /* need at least 4 bytes to copy */
+ BLT _f1tail
+
+_f4align: /* align destination on 4 */
+ AND.S $3, R(TS), R(TMP)
+ BEQ _f4aligned
+
+ MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */
+ MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */
+ B _f4align
+
+_f4aligned: /* is source now aligned? */
+ AND.S $3, R(FROM), R(TMP)
+ BNE _funaligned
+
+ SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */
+_f32loop:
+ CMP R(TMP), R(TS)
+ BHS _f4tail
+
+ MOVM.IA.W (R(FROM)), [R4-R7]
+ MOVM.IA.W [R4-R7], (R(TS))
+ MOVM.IA.W (R(FROM)), [R4-R7]
+ MOVM.IA.W [R4-R7], (R(TS))
+ B _f32loop
+
+_f4tail:
+ SUB $3, R(TE), R(TMP) /* do remaining words if possible */
+_f4loop:
+ CMP R(TMP), R(TS)
+ BHS _f1tail
+
+ MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */
+ MOVW.P R4, 4(R(TS)) /* implicit write back */
+ B _f4loop
+
+_f1tail:
+ CMP R(TS), R(TE)
+ BEQ _return
+
+ MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */
+ MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */
+ B _f1tail
+
+_return:
+ MOVW to+0(FP), R0
+ RET
+
+RSHIFT = 4
+LSHIFT = 5
+OFFSET = 11
+
+BR0 = 6
+BW0 = 7
+BR1 = 7
+BW1 = 8
+
+_bunaligned:
+ CMP $2, R(TMP) /* is R(TMP) < 2 ? */
+
+ MOVW.LT $8, R(RSHIFT) /* (R(n)<<24)|(R(n-1)>>8) */
+ MOVW.LT $24, R(LSHIFT)
+ MOVW.LT $1, R(OFFSET)
+
+ MOVW.EQ $16, R(RSHIFT) /* (R(n)<<16)|(R(n-1)>>16) */
+ MOVW.EQ $16, R(LSHIFT)
+ MOVW.EQ $2, R(OFFSET)
+
+ MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */
+ MOVW.GT $8, R(LSHIFT)
+ MOVW.GT $3, R(OFFSET)
+
+ ADD $8, R(TS), R(TMP) /* do 8-byte chunks if possible */
+ CMP R(TMP), R(TE)
+ BLS _b1tail
+
+ BIC $3, R(FROM) /* align source */
+ MOVW (R(FROM)), R(BR0) /* prime first block register */
+
+_bu8loop:
+ CMP R(TMP), R(TE)
+ BLS _bu1tail
+
+ MOVW R(BR0)<<R(LSHIFT), R(BW1)
+ MOVM.DB.W (R(FROM)), [R(BR0)-R(BR1)]
+ ORR R(BR1)>>R(RSHIFT), R(BW1)
+
+ MOVW R(BR1)<<R(LSHIFT), R(BW0)
+ ORR R(BR0)>>R(RSHIFT), R(BW0)
+
+ MOVM.DB.W [R(BW0)-R(BW1)], (R(TE))
+ B _bu8loop
+
+_bu1tail:
+ ADD R(OFFSET), R(FROM)
+ B _b1tail
+
+RSHIFT = 4
+LSHIFT = 5
+OFFSET = 11
+
+FW0 = 6
+FR0 = 7
+FW1 = 7
+FR1 = 8
+
+_funaligned:
+ CMP $2, R(TMP)
+
+ MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */
+ MOVW.LT $24, R(LSHIFT)
+ MOVW.LT $3, R(OFFSET)
+
+ MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */
+ MOVW.EQ $16, R(LSHIFT)
+ MOVW.EQ $2, R(OFFSET)
+
+ MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */
+ MOVW.GT $8, R(LSHIFT)
+ MOVW.GT $1, R(OFFSET)
+
+ SUB $8, R(TE), R(TMP) /* do 8-byte chunks if possible */
+ CMP R(TMP), R(TS)
+ BHS _f1tail
+
+ BIC $3, R(FROM) /* align source */
+ MOVW.P 4(R(FROM)), R(FR1) /* prime last block register, implicit write back */
+
+_fu8loop:
+ CMP R(TMP), R(TS)
+ BHS _fu1tail
+
+ MOVW R(FR1)>>R(RSHIFT), R(FW0)
+ MOVM.IA.W (R(FROM)), [R(FR0)-R(FR1)]
+ ORR R(FR0)<<R(LSHIFT), R(FW0)
+
+ MOVW R(FR0)>>R(RSHIFT), R(FW1)
+ ORR R(FR1)<<R(LSHIFT), R(FW1)
+
+ MOVM.IA.W [R(FW0)-R(FW1)], (R(TS))
+ B _fu8loop
+
+_fu1tail:
+ SUB R(OFFSET), R(FROM)
+ B _f1tail
diff --git a/sys/src/ape/lib/ap/arm/memset.s b/sys/src/ape/lib/ap/arm/memset.s
new file mode 100755
index 000000000..7ebbb44c3
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/memset.s
@@ -0,0 +1,60 @@
+TO = 1
+TOE = 2
+N = 3
+TMP = 3 /* N and TMP don't overlap */
+
+TEXT memset(SB), $0
+ MOVW R0, R(TO)
+ MOVW data+4(FP), R(4)
+ MOVW n+8(FP), R(N)
+
+ ADD R(N), R(TO), R(TOE) /* to end pointer */
+
+ CMP $4, R(N) /* need at least 4 bytes to copy */
+ BLT _1tail
+
+ AND $0xFF, R(4)
+ ORR R(4)<<8, R(4)
+ ORR R(4)<<16, R(4) /* replicate to word */
+
+_4align: /* align on 4 */
+ AND.S $3, R(TO), R(TMP)
+ BEQ _4aligned
+
+ MOVBU.P R(4), 1(R(TO)) /* implicit write back */
+ B _4align
+
+_4aligned:
+ SUB $15, R(TOE), R(TMP) /* do 16-byte chunks if possible */
+ CMP R(TMP), R(TO)
+ BHS _4tail
+
+ MOVW R4, R5 /* replicate */
+ MOVW R4, R6
+ MOVW R4, R7
+
+_f16loop:
+ CMP R(TMP), R(TO)
+ BHS _4tail
+
+ MOVM.IA.W [R4-R7], (R(TO))
+ B _f16loop
+
+_4tail:
+ SUB $3, R(TOE), R(TMP) /* do remaining words if possible */
+_4loop:
+ CMP R(TMP), R(TO)
+ BHS _1tail
+
+ MOVW.P R(4), 4(R(TO)) /* implicit write back */
+ B _4loop
+
+_1tail:
+ CMP R(TO), R(TOE)
+ BEQ _return
+
+ MOVBU.P R(4), 1(R(TO)) /* implicit write back */
+ B _1tail
+
+_return:
+ RET
diff --git a/sys/src/ape/lib/ap/arm/mkfile b/sys/src/ape/lib/ap/arm/mkfile
new file mode 100755
index 000000000..f65e34395
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/mkfile
@@ -0,0 +1,25 @@
+APE=/sys/src/ape
+<$APE/config
+LIB=/$objtype/lib/ape/libap.a
+OFILES=\
+ cycles.$O\
+ div.$O\
+ getfcr.$O\
+ lock.$O\
+ main9.$O\
+ main9p.$O\
+ memmove.$O\
+ memset.$O\
+ notetramp.$O\
+ setjmp.$O\
+ strchr.$O\
+ strcmp.$O\
+ strcpy.$O\
+ tas.$O\
+ vlop.$O\
+ vlrt.$O\
+
+</sys/src/cmd/mksyslib
+
+CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE
+
diff --git a/sys/src/ape/lib/ap/arm/notetramp.c b/sys/src/ape/lib/ap/arm/notetramp.c
new file mode 100755
index 000000000..6b1be6641
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/notetramp.c
@@ -0,0 +1,72 @@
+#include "../plan9/lib.h"
+#include "../plan9/sys9.h"
+#include <signal.h>
+#include <setjmp.h>
+
+/* A stack to hold pcs when signals nest */
+#define MAXSIGSTACK 20
+typedef struct Pcstack Pcstack;
+static struct Pcstack {
+ int sig;
+ void (*hdlr)(int, char*, Ureg*);
+ unsigned long restorepc;
+ Ureg *u;
+} pcstack[MAXSIGSTACK];
+static int nstack = 0;
+
+static void notecont(Ureg*, char*);
+
+void
+_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u)
+{
+ Pcstack *p;
+
+ if(nstack >= MAXSIGSTACK)
+ _NOTED(1); /* nesting too deep; just do system default */
+ p = &pcstack[nstack];
+ p->restorepc = u->pc;
+ p->sig = sig;
+ p->hdlr = hdlr;
+ p->u = u;
+ nstack++;
+ u->pc = (unsigned long) notecont;
+ _NOTED(2); /* NSAVE: clear note but hold state */
+}
+
+static void
+notecont(Ureg *u, char *s)
+{
+ Pcstack *p;
+ void(*f)(int, char*, Ureg*);
+
+ p = &pcstack[nstack-1];
+ f = p->hdlr;
+ u->pc = p->restorepc;
+ nstack--;
+ (*f)(p->sig, s, u);
+ _NOTED(3); /* NRSTR */
+}
+
+#define JMPBUFPC 1
+#define JMPBUFSP 0
+
+extern sigset_t _psigblocked;
+
+void
+siglongjmp(sigjmp_buf j, int ret)
+{
+ struct Ureg *u;
+
+ if(j[0])
+ _psigblocked = j[1];
+ if(nstack == 0 || pcstack[nstack-1].u->sp > j[2+JMPBUFSP])
+ longjmp(j+2, ret);
+ u = pcstack[nstack-1].u;
+ nstack--;
+ u->r0 = ret;
+ if(ret == 0)
+ u->r0 = 1;
+ u->pc = j[2+JMPBUFPC];
+ u->sp = j[2+JMPBUFSP];
+ _NOTED(3); /* NRSTR */
+}
diff --git a/sys/src/ape/lib/ap/arm/setjmp.s b/sys/src/ape/lib/ap/arm/setjmp.s
new file mode 100755
index 000000000..62a06245c
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/setjmp.s
@@ -0,0 +1,29 @@
+arg=0
+link=14
+sp=13
+
+TEXT setjmp(SB), 1, $-4
+ MOVW R(sp), (R(arg+0))
+ MOVW R(link), 4(R(arg+0))
+ MOVW $0, R0
+ RET
+
+TEXT sigsetjmp(SB), 1, $-4
+ MOVW savemask+4(FP), R(arg+2)
+ MOVW R(arg+2), 0(R(arg+0))
+ MOVW $_psigblocked(SB), R(arg+2)
+ MOVW R2, 4(R(arg+0))
+ MOVW R(sp), 8(R(arg+0))
+ MOVW R(link), 12(R(arg+0))
+ MOVW $0, R(arg+0)
+ RET
+
+TEXT longjmp(SB), 1, $-4
+ MOVW r+4(FP), R(arg+2)
+ CMP $0, R(arg+2)
+ BNE ok /* ansi: "longjmp(0) => longjmp(1)" */
+ MOVW $1, R(arg+2) /* bless their pointed heads */
+ok: MOVW (R(arg+0)), R(sp)
+ MOVW 4(R(arg+0)), R(link)
+ MOVW R(arg+2), R(arg+0)
+ RET
diff --git a/sys/src/ape/lib/ap/arm/strchr.s b/sys/src/ape/lib/ap/arm/strchr.s
new file mode 100755
index 000000000..349b5a49f
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/strchr.s
@@ -0,0 +1,56 @@
+TEXT strchr(SB), $-4
+ MOVBU c+4(FP), R1
+ CMP $0, R1
+ BEQ _null
+
+_strchr: /* not looking for a null, byte at a time */
+ MOVBU.P 1(R0), R2
+ CMP R1, R2
+ BEQ _sub1
+
+ CMP $0, R2
+ BNE _strchr
+
+_return0: /* character not found in string, return 0 */
+ MOVW $0, R0
+ RET
+
+_null: /* looking for null, align */
+ AND.S $3, R0, R2
+ BEQ _aligned
+
+ MOVBU.P 1(R0), R4
+ CMP $0, R4
+ BEQ _sub1
+ B _null
+
+_aligned:
+ MOVW $0xFF, R3 /* mask */
+
+_loop:
+ MOVW.P 4(R0), R4 /* 4 at a time */
+ TST R4, R3 /* AND.S R2, R3, Rx */
+ TST.NE R4>>8, R3
+ TST.NE R4>>16, R3
+ TST.NE R4>>24, R3
+ BNE _loop
+
+ TST R4, R3 /* its somewhere, find it and correct */
+ BEQ _sub4
+ TST R4>>8, R3
+ BEQ _sub3
+ TST R4>>16, R3
+ BEQ _sub2
+
+_sub1: /* compensate for pointer increment */
+ SUB $1, R0
+ RET
+_sub2:
+ SUB $2, R0
+ RET
+_sub3:
+ SUB $3, R0
+ RET
+_sub4:
+ SUB $4, R0
+ RET
diff --git a/sys/src/ape/lib/ap/arm/strcmp.s b/sys/src/ape/lib/ap/arm/strcmp.s
new file mode 100755
index 000000000..015e51596
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/strcmp.s
@@ -0,0 +1,67 @@
+TEXT strcmp(SB), $-4
+ MOVW R0, R1
+ MOVW s2+4(FP), R2
+
+ MOVW $0xFF, R3 /* mask */
+
+_align: /* align s1 on 4 */
+ TST $3, R1
+ BEQ _aligned
+
+ MOVBU.P 1(R1), R4 /* implicit write back */
+ MOVBU.P 1(R2), R8 /* implicit write back */
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+ B _align
+
+_aligned: /* is s2 now aligned? */
+ TST $3, R2
+ BNE _unaligned
+
+_aloop:
+ MOVW.P 4(R1), R5 /* 4 at a time */
+ MOVW.P 4(R2), R7
+
+ AND R5, R3, R4
+ AND R7, R3, R8
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+
+ AND R5>>8, R3, R4
+ AND R7>>8, R3, R8
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+
+ AND R5>>16, R3, R4
+ AND R7>>16, R3, R8
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+
+ AND R5>>24, R3, R4
+ AND R7>>24, R3, R8
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+
+ B _aloop
+
+_return:
+ RET
+
+_unaligned:
+ MOVBU.P 1(R1), R4 /* implicit write back */
+ MOVBU.P 1(R2), R8 /* implicit write back */
+ SUB.S R8, R4, R0
+ BNE _return
+ CMP $0, R4
+ BEQ _return
+ B _unaligned
diff --git a/sys/src/ape/lib/ap/arm/strcpy.s b/sys/src/ape/lib/ap/arm/strcpy.s
new file mode 100755
index 000000000..3e69fdc7d
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/strcpy.s
@@ -0,0 +1,46 @@
+TEXT strcpy(SB), $-4
+ MOVW R0, to+0(FP) /* need to save for return value */
+ MOVW from+4(FP), R1
+ MOVW $0xFF, R2 /* mask */
+
+salign: /* align source on 4 */
+ AND.S $3, R1, R3
+ BEQ dalign
+ MOVBU.P 1(R1), R3 /* implicit write back */
+ TST R3, R2
+ MOVBU.P R3, 1(R0) /* implicit write back */
+ BNE salign
+ B return
+
+dalign: /* is destination now aligned? */
+ AND.S $3, R0, R3
+ BNE uloop
+
+aloop:
+ MOVW.P 4(R1), R4 /* read 4, write 4 */
+ TST R4, R2 /* AND.S R3, R2, Rx */
+ TST.NE R4>>8, R2
+ TST.NE R4>>16, R2
+ TST.NE R4>>24, R2
+ BEQ tail
+ MOVW.P R4, 4(R0)
+ B aloop
+
+uloop:
+ MOVW.P 4(R1), R4 /* read 4, write 1,1,1,1 */
+
+tail:
+ AND.S R4, R2, R3
+ MOVBU.NE.P R3, 1(R0)
+ AND.NE.S R4>>8, R2, R3
+ MOVBU.NE.P R3, 1(R0)
+ AND.NE.S R4>>16, R2, R3
+ MOVBU.NE.P R3, 1(R0)
+ AND.NE.S R4>>24, R2, R3
+ MOVBU.P R3, 1(R0)
+ BNE uloop
+ B return
+
+return:
+ MOVW to+0(FP), R0
+ RET
diff --git a/sys/src/ape/lib/ap/arm/tas.s b/sys/src/ape/lib/ap/arm/tas.s
new file mode 100755
index 000000000..f1269209e
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/tas.s
@@ -0,0 +1,5 @@
+TEXT tas(SB), $-4
+ MOVW R0,R1
+ MOVW $1,R0
+ SWPW R0,(R1)
+ RET
diff --git a/sys/src/ape/lib/ap/arm/vlop.s b/sys/src/ape/lib/ap/arm/vlop.s
new file mode 100755
index 000000000..3a5375541
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/vlop.s
@@ -0,0 +1,13 @@
+TEXT _mulv(SB), $0
+ MOVW 4(FP),R8 /* l0 */
+ MOVW 8(FP),R11 /* h0 */
+ MOVW 12(FP),R4 /* l1 */
+ MOVW 16(FP),R5 /* h1 */
+ MULLU R8,R4,(R6, R7) /* l0*l1 */
+ MUL R8,R5,R5 /* l0*h1 */
+ MUL R11,R4,R4 /* h0*l1 */
+ ADD R4,R6
+ ADD R5,R6
+ MOVW R6,4(R0)
+ MOVW R7,0(R0)
+ RET
diff --git a/sys/src/ape/lib/ap/arm/vlrt.c b/sys/src/ape/lib/ap/arm/vlrt.c
new file mode 100755
index 000000000..5e9524d34
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/vlrt.c
@@ -0,0 +1,708 @@
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+typedef signed char schar;
+
+#define SIGN(n) (1UL<<(n-1))
+
+typedef struct Vlong Vlong;
+struct Vlong
+{
+ ulong lo;
+ ulong hi;
+};
+
+void abort(void);
+
+/* needed by profiler; can't be profiled */
+#pragma profile off
+
+void
+_addv(Vlong *r, Vlong a, Vlong b)
+{
+ ulong lo, hi;
+
+ lo = a.lo + b.lo;
+ hi = a.hi + b.hi;
+ if(lo < a.lo)
+ hi++;
+ r->lo = lo;
+ r->hi = hi;
+}
+
+void
+_subv(Vlong *r, Vlong a, Vlong b)
+{
+ ulong lo, hi;
+
+ lo = a.lo - b.lo;
+ hi = a.hi - b.hi;
+ if(lo > a.lo)
+ hi--;
+ r->lo = lo;
+ r->hi = hi;
+}
+
+#pragma profile on
+
+void
+_d2v(Vlong *y, double d)
+{
+ union { double d; struct Vlong; } x;
+ ulong xhi, xlo, ylo, yhi;
+ int sh;
+
+ x.d = d;
+
+ xhi = (x.hi & 0xfffff) | 0x100000;
+ xlo = x.lo;
+ sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+ ylo = 0;
+ yhi = 0;
+ if(sh >= 0) {
+ /* v = (hi||lo) >> sh */
+ if(sh < 32) {
+ if(sh == 0) {
+ ylo = xlo;
+ yhi = xhi;
+ } else {
+ ylo = (xlo >> sh) | (xhi << (32-sh));
+ yhi = xhi >> sh;
+ }
+ } else {
+ if(sh == 32) {
+ ylo = xhi;
+ } else
+ if(sh < 64) {
+ ylo = xhi >> (sh-32);
+ }
+ }
+ } else {
+ /* v = (hi||lo) << -sh */
+ sh = -sh;
+ if(sh <= 10) {
+ ylo = xlo << sh;
+ yhi = (xhi << sh) | (xlo >> (32-sh));
+ } else {
+ /* overflow */
+ yhi = d; /* causes something awful */
+ }
+ }
+ if(x.hi & SIGN(32)) {
+ if(ylo != 0) {
+ ylo = -ylo;
+ yhi = ~yhi;
+ } else
+ yhi = -yhi;
+ }
+
+ y->hi = yhi;
+ y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+ _d2v(y, f);
+}
+
+double
+_v2d(Vlong x)
+{
+ if(x.hi & SIGN(32)) {
+ if(x.lo) {
+ x.lo = -x.lo;
+ x.hi = ~x.hi;
+ } else
+ x.hi = -x.hi;
+ return -((long)x.hi*4294967296. + x.lo);
+ }
+ return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+ return _v2d(x);
+}
+
+
+static void
+dodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
+{
+ ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
+ int i;
+
+ numhi = num.hi;
+ numlo = num.lo;
+ denhi = den.hi;
+ denlo = den.lo;
+ /*
+ * get a divide by zero
+ */
+ if(denlo==0 && denhi==0) {
+ numlo = numlo / denlo;
+ }
+
+ /*
+ * set up the divisor and find the number of iterations needed
+ */
+ if(numhi >= SIGN(32)) {
+ quohi = SIGN(32);
+ quolo = 0;
+ } else {
+ quohi = numhi;
+ quolo = numlo;
+ }
+ i = 0;
+ while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+ denhi = (denhi<<1) | (denlo>>31);
+ denlo <<= 1;
+ i++;
+ }
+
+ quohi = 0;
+ quolo = 0;
+ for(; i >= 0; i--) {
+ quohi = (quohi<<1) | (quolo>>31);
+ quolo <<= 1;
+ if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+ t = numlo;
+ numlo -= denlo;
+ if(numlo > t)
+ numhi--;
+ numhi -= denhi;
+ quolo |= 1;
+ }
+ denlo = (denlo>>1) | (denhi<<31);
+ denhi >>= 1;
+ }
+
+ if(q) {
+ q->lo = quolo;
+ q->hi = quohi;
+ }
+ if(r) {
+ r->lo = numlo;
+ r->hi = numhi;
+ }
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+ if(n.hi == 0 && d.hi == 0) {
+ q->hi = 0;
+ q->lo = n.lo / d.lo;
+ return;
+ }
+ dodiv(n, d, q, 0);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+ if(n.hi == 0 && d.hi == 0) {
+ r->hi = 0;
+ r->lo = n.lo % d.lo;
+ return;
+ }
+ dodiv(n, d, 0, r);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+ if(v->lo == 0) {
+ v->hi = -v->hi;
+ return;
+ }
+ v->lo = -v->lo;
+ v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ q->lo = (long)n.lo / (long)d.lo;
+ q->hi = ((long)q->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ dodiv(n, d, q, 0);
+ if(nneg != dneg)
+ vneg(q);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+ long nneg, dneg;
+
+ if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+ r->lo = (long)n.lo % (long)d.lo;
+ r->hi = ((long)r->lo) >> 31;
+ return;
+ }
+ nneg = n.hi >> 31;
+ if(nneg)
+ vneg(&n);
+ dneg = d.hi >> 31;
+ if(dneg)
+ vneg(&d);
+ dodiv(n, d, 0, r);
+ if(nneg)
+ vneg(r);
+}
+
+void
+_rshav(Vlong *r, Vlong a, int b)
+{
+ long t;
+
+ t = a.hi;
+ if(b >= 32) {
+ r->hi = t>>31;
+ if(b >= 64) {
+ /* this is illegal re C standard */
+ r->lo = t>>31;
+ return;
+ }
+ r->lo = t >> (b-32);
+ return;
+ }
+ if(b <= 0) {
+ r->hi = t;
+ r->lo = a.lo;
+ return;
+ }
+ r->hi = t >> b;
+ r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_rshlv(Vlong *r, Vlong a, int b)
+{
+ ulong t;
+
+ t = a.hi;
+ if(b >= 32) {
+ r->hi = 0;
+ if(b >= 64) {
+ /* this is illegal re C standard */
+ r->lo = 0;
+ return;
+ }
+ r->lo = t >> (b-32);
+ return;
+ }
+ if(b <= 0) {
+ r->hi = t;
+ r->lo = a.lo;
+ return;
+ }
+ r->hi = t >> b;
+ r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_lshv(Vlong *r, Vlong a, int b)
+{
+ ulong t;
+
+ t = a.lo;
+ if(b >= 32) {
+ r->lo = 0;
+ if(b >= 64) {
+ /* this is illegal re C standard */
+ r->hi = 0;
+ return;
+ }
+ r->hi = t << (b-32);
+ return;
+ }
+ if(b <= 0) {
+ r->lo = t;
+ r->hi = a.hi;
+ return;
+ }
+ r->lo = t << b;
+ r->hi = (t >> (32-b)) | (a.hi << b);
+}
+
+void
+_andv(Vlong *r, Vlong a, Vlong b)
+{
+ r->hi = a.hi & b.hi;
+ r->lo = a.lo & b.lo;
+}
+
+void
+_orv(Vlong *r, Vlong a, Vlong b)
+{
+ r->hi = a.hi | b.hi;
+ r->lo = a.lo | b.lo;
+}
+
+void
+_xorv(Vlong *r, Vlong a, Vlong b)
+{
+ r->hi = a.hi ^ b.hi;
+ r->lo = a.lo ^ b.lo;
+}
+
+void
+_vpp(Vlong *l, Vlong *r)
+{
+
+ l->hi = r->hi;
+ l->lo = r->lo;
+ r->lo++;
+ if(r->lo == 0)
+ r->hi++;
+}
+
+void
+_vmm(Vlong *l, Vlong *r)
+{
+
+ l->hi = r->hi;
+ l->lo = r->lo;
+ if(r->lo == 0)
+ r->hi--;
+ r->lo--;
+}
+
+void
+_ppv(Vlong *l, Vlong *r)
+{
+
+ r->lo++;
+ if(r->lo == 0)
+ r->hi++;
+ l->hi = r->hi;
+ l->lo = r->lo;
+}
+
+void
+_mmv(Vlong *l, Vlong *r)
+{
+
+ if(r->lo == 0)
+ r->hi--;
+ r->lo--;
+ l->hi = r->hi;
+ l->lo = r->lo;
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+ Vlong t, u;
+
+ u = *ret;
+ switch(type) {
+ default:
+ abort();
+ break;
+
+ case 1: /* schar */
+ t.lo = *(schar*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(schar*)lv = u.lo;
+ break;
+
+ case 2: /* uchar */
+ t.lo = *(uchar*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uchar*)lv = u.lo;
+ break;
+
+ case 3: /* short */
+ t.lo = *(short*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(short*)lv = u.lo;
+ break;
+
+ case 4: /* ushort */
+ t.lo = *(ushort*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ushort*)lv = u.lo;
+ break;
+
+ case 9: /* int */
+ t.lo = *(int*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(int*)lv = u.lo;
+ break;
+
+ case 10: /* uint */
+ t.lo = *(uint*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(uint*)lv = u.lo;
+ break;
+
+ case 5: /* long */
+ t.lo = *(long*)lv;
+ t.hi = t.lo >> 31;
+ fn(&u, t, rv);
+ *(long*)lv = u.lo;
+ break;
+
+ case 6: /* ulong */
+ t.lo = *(ulong*)lv;
+ t.hi = 0;
+ fn(&u, t, rv);
+ *(ulong*)lv = u.lo;
+ break;
+
+ case 7: /* vlong */
+ case 8: /* uvlong */
+ fn(&u, *(Vlong*)lv, rv);
+ *(Vlong*)lv = u;
+ break;
+ }
+ *ret = u;
+}
+
+void
+_p2v(Vlong *ret, void *p)
+{
+ long t;
+
+ t = (ulong)p;
+ ret->lo = t;
+ ret->hi = 0;
+}
+
+void
+_sl2v(Vlong *ret, long sl)
+{
+ long t;
+
+ t = sl;
+ ret->lo = t;
+ ret->hi = t >> 31;
+}
+
+void
+_ul2v(Vlong *ret, ulong ul)
+{
+ long t;
+
+ t = ul;
+ ret->lo = t;
+ ret->hi = 0;
+}
+
+void
+_si2v(Vlong *ret, int si)
+{
+ long t;
+
+ t = si;
+ ret->lo = t;
+ ret->hi = t >> 31;
+}
+
+void
+_ui2v(Vlong *ret, uint ui)
+{
+ long t;
+
+ t = ui;
+ ret->lo = t;
+ ret->hi = 0;
+}
+
+void
+_sh2v(Vlong *ret, long sh)
+{
+ long t;
+
+ t = (sh << 16) >> 16;
+ ret->lo = t;
+ ret->hi = t >> 31;
+}
+
+void
+_uh2v(Vlong *ret, ulong ul)
+{
+ long t;
+
+ t = ul & 0xffff;
+ ret->lo = t;
+ ret->hi = 0;
+}
+
+void
+_sc2v(Vlong *ret, long uc)
+{
+ long t;
+
+ t = (uc << 24) >> 24;
+ ret->lo = t;
+ ret->hi = t >> 31;
+}
+
+void
+_uc2v(Vlong *ret, ulong ul)
+{
+ long t;
+
+ t = ul & 0xff;
+ ret->lo = t;
+ ret->hi = 0;
+}
+
+long
+_v2sc(Vlong rv)
+{
+ long t;
+
+ t = rv.lo & 0xff;
+ return (t << 24) >> 24;
+}
+
+long
+_v2uc(Vlong rv)
+{
+
+ return rv.lo & 0xff;
+}
+
+long
+_v2sh(Vlong rv)
+{
+ long t;
+
+ t = rv.lo & 0xffff;
+ return (t << 16) >> 16;
+}
+
+long
+_v2uh(Vlong rv)
+{
+
+ return rv.lo & 0xffff;
+}
+
+long
+_v2sl(Vlong rv)
+{
+
+ return rv.lo;
+}
+
+long
+_v2ul(Vlong rv)
+{
+
+ return rv.lo;
+}
+
+long
+_v2si(Vlong rv)
+{
+
+ return rv.lo;
+}
+
+long
+_v2ui(Vlong rv)
+{
+
+ return rv.lo;
+}
+
+int
+_testv(Vlong rv)
+{
+ return rv.lo || rv.hi;
+}
+
+int
+_eqv(Vlong lv, Vlong rv)
+{
+ return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+int
+_nev(Vlong lv, Vlong rv)
+{
+ return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+int
+_ltv(Vlong lv, Vlong rv)
+{
+ return (long)lv.hi < (long)rv.hi ||
+ (lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lev(Vlong lv, Vlong rv)
+{
+ return (long)lv.hi < (long)rv.hi ||
+ (lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_gtv(Vlong lv, Vlong rv)
+{
+ return (long)lv.hi > (long)rv.hi ||
+ (lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_gev(Vlong lv, Vlong rv)
+{
+ return (long)lv.hi > (long)rv.hi ||
+ (lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+int
+_lov(Vlong lv, Vlong rv)
+{
+ return lv.hi < rv.hi ||
+ (lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lsv(Vlong lv, Vlong rv)
+{
+ return lv.hi < rv.hi ||
+ (lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_hiv(Vlong lv, Vlong rv)
+{
+ return lv.hi > rv.hi ||
+ (lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_hsv(Vlong lv, Vlong rv)
+{
+ return lv.hi > rv.hi ||
+ (lv.hi == rv.hi && lv.lo >= rv.lo);
+}