diff options
author | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
---|---|---|
committer | Taru Karttunen <taruti@taruti.net> | 2011-03-30 15:46:40 +0300 |
commit | e5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch) | |
tree | d8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/ape/lib/ap/arm |
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/ape/lib/ap/arm')
-rwxr-xr-x | sys/src/ape/lib/ap/arm/cycles.c | 5 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/div.s | 118 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/getfcr.s | 16 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/lock.c | 26 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/main9.s | 17 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/main9p.s | 52 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/memmove.s | 212 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/memset.s | 60 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/mkfile | 25 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/notetramp.c | 72 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/setjmp.s | 29 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/strchr.s | 56 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/strcmp.s | 67 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/strcpy.s | 46 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/tas.s | 5 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/vlop.s | 13 | ||||
-rwxr-xr-x | sys/src/ape/lib/ap/arm/vlrt.c | 708 |
17 files changed, 1527 insertions, 0 deletions
diff --git a/sys/src/ape/lib/ap/arm/cycles.c b/sys/src/ape/lib/ap/arm/cycles.c new file mode 100755 index 000000000..1c32bc732 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/cycles.c @@ -0,0 +1,5 @@ +void +_cycles(unsigned long long *u) +{ + *u = 0; +} diff --git a/sys/src/ape/lib/ap/arm/div.s b/sys/src/ape/lib/ap/arm/div.s new file mode 100755 index 000000000..2f7699c50 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/div.s @@ -0,0 +1,118 @@ +Q = 0 +N = 1 +D = 2 +CC = 3 +TMP = 11 + +TEXT save<>(SB), 1, $0 + MOVW R(Q), 0(FP) + MOVW R(N), 4(FP) + MOVW R(D), 8(FP) + MOVW R(CC), 12(FP) + + MOVW R(TMP), R(Q) /* numerator */ + MOVW 20(FP), R(D) /* denominator */ + CMP $0, R(D) + BNE s1 + MOVW -1(R(D)), R(TMP) /* divide by zero fault */ +s1: RET + +TEXT rest<>(SB), 1, $0 + MOVW 0(FP), R(Q) + MOVW 4(FP), R(N) + MOVW 8(FP), R(D) + MOVW 12(FP), R(CC) +/* + * return to caller + * of rest<> + */ + MOVW 0(R13), R14 + ADD $20, R13 + B (R14) + +TEXT div<>(SB), 1, $0 + MOVW $32, R(CC) +/* + * skip zeros 8-at-a-time + */ +e1: + AND.S $(0xff<<24),R(Q), R(N) + BNE e2 + SLL $8, R(Q) + SUB.S $8, R(CC) + BNE e1 + RET +e2: + MOVW $0, R(N) + +loop: +/* + * shift R(N||Q) left one + */ + SLL $1, R(N) + CMP $0, R(Q) + ORR.LT $1, R(N) + SLL $1, R(Q) + +/* + * compare numerator to denominator + * if less, subtract and set quotent bit + */ + CMP R(D), R(N) + ORR.HS $1, R(Q) + SUB.HS R(D), R(N) + SUB.S $1, R(CC) + BNE loop + RET + +TEXT _div(SB), 1, $16 + BL save<>(SB) + CMP $0, R(Q) + BGE d1 + RSB $0, R(Q), R(Q) + CMP $0, R(D) + BGE d2 + RSB $0, R(D), R(D) +d0: + BL div<>(SB) /* none/both neg */ + MOVW R(Q), R(TMP) + B out +d1: + CMP $0, R(D) + BGE d0 + RSB $0, R(D), R(D) +d2: + BL div<>(SB) /* one neg */ + RSB $0, R(Q), R(TMP) + B out + +TEXT _mod(SB), 1, $16 + BL save<>(SB) + CMP $0, R(D) + RSB.LT $0, R(D), R(D) + CMP $0, R(Q) + BGE m1 + RSB $0, R(Q), R(Q) + BL div<>(SB) /* neg numerator */ + RSB $0, R(N), R(TMP) + B out +m1: + BL div<>(SB) /* pos numerator */ + MOVW R(N), R(TMP) + B out + +TEXT _divu(SB), 1, $16 + BL save<>(SB) + BL div<>(SB) + MOVW R(Q), R(TMP) + B out + +TEXT _modu(SB), 1, $16 + BL save<>(SB) + BL div<>(SB) + MOVW R(N), R(TMP) + B out + +out: + BL rest<>(SB) + B out diff --git a/sys/src/ape/lib/ap/arm/getfcr.s b/sys/src/ape/lib/ap/arm/getfcr.s new file mode 100755 index 000000000..b27c7f082 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/getfcr.s @@ -0,0 +1,16 @@ + +TEXT setfcr(SB), $4 + MOVW R0, FPCR + RET + +TEXT getfcr(SB), $4 + MOVW FPCR, R0 + RET + +TEXT getfsr(SB), $0 + MOVW FPSR, R0 + RET + +TEXT setfsr(SB), $0 + MOVW R0, FPSR + RET diff --git a/sys/src/ape/lib/ap/arm/lock.c b/sys/src/ape/lib/ap/arm/lock.c new file mode 100755 index 000000000..91c0ba233 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/lock.c @@ -0,0 +1,26 @@ +#define _LOCK_EXTENSION +#include "../plan9/sys9.h" +#include <lock.h> + +int tas(int*); + +void +lock(Lock *lk) +{ + while(tas(&lk->val)) + _SLEEP(0); +} + +int +canlock(Lock *lk) +{ + if(tas(&lk->val)) + return 0; + return 1; +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} diff --git a/sys/src/ape/lib/ap/arm/main9.s b/sys/src/ape/lib/ap/arm/main9.s new file mode 100755 index 000000000..cd04bea17 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/main9.s @@ -0,0 +1,17 @@ +arg=0 +sp=13 +sb=12 + +TEXT _main(SB), 1, $16 + MOVW $setR12(SB), R(sb) + BL _envsetup(SB) + MOVW $inargv+0(FP), R(arg) + MOVW R(arg), 8(R(sp)) + MOVW inargc-4(FP), R(arg) + MOVW R(arg), 4(R(sp)) + BL main(SB) +loop: + MOVW R(arg), 4(R(sp)) + BL exit(SB) + BL _div(SB) + B loop diff --git a/sys/src/ape/lib/ap/arm/main9p.s b/sys/src/ape/lib/ap/arm/main9p.s new file mode 100755 index 000000000..f5dc65344 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/main9p.s @@ -0,0 +1,52 @@ +arg=0 +sp=13 +sb=12 + +#define NPRIVATES 16 + +GLOBL _tos(SB), $4 +GLOBL _privates(SB), $4 +GLOBL _nprivates(SB), $4 + +TEXT _mainp(SB), 1, $(3*4+NPRIVATES*4) + + MOVW $setR12(SB), R(sb) + + /* _tos = arg */ + MOVW R(arg), _tos(SB) + MOVW $private+8(SP), R1 + MOVW R1, _privates(SB) + MOVW $NPRIVATES, R1 + MOVW R1, _nprivates(SB) + + /* _profmain(); */ + BL _profmain(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVW _tos+0(SB),R1 + MOVW 4(R1), R2 + MOVW R2, 0(R1) + + BL _envsetup(SB) + + /* main(argc, argv, environ); */ + MOVW $inargv+0(FP), R(arg) + MOVW R(arg), 8(R(sp)) + MOVW inargc-4(FP), R(arg) + MOVW R(arg), 4(R(sp)) + MOVW environ(SB), R(arg) + MOVW R(arg), 8(R(sp)) + BL main(SB) +loop: + MOVW R(arg), 4(R(sp)) + BL exit(SB) + MOVW $_div(SB), R(arg) /* force loading of div */ + MOVW $_profin(SB), R(arg) /* force loading of profile */ + B loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVW argp-4(FP), R(arg) + RET diff --git a/sys/src/ape/lib/ap/arm/memmove.s b/sys/src/ape/lib/ap/arm/memmove.s new file mode 100755 index 000000000..346a23d72 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/memmove.s @@ -0,0 +1,212 @@ +TS = 0 +TE = 1 +FROM = 2 +N = 3 +TMP = 3 /* N and TMP don't overlap */ +TMP1 = 4 + +TEXT memcpy(SB), $-4 + B _memmove +TEXT memmove(SB), $-4 +_memmove: + MOVW R(TS), to+0(FP) /* need to save for return value */ + MOVW from+4(FP), R(FROM) + MOVW n+8(FP), R(N) + + ADD R(N), R(TS), R(TE) /* to end pointer */ + + CMP R(FROM), R(TS) + BLS _forward + +_back: + ADD R(N), R(FROM) /* from end pointer */ + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _b1tail + +_b4align: /* align destination on 4 */ + AND.S $3, R(TE), R(TMP) + BEQ _b4aligned + + MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ + MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ + B _b4align + +_b4aligned: /* is source now aligned? */ + AND.S $3, R(FROM), R(TMP) + BNE _bunaligned + + ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ +_b32loop: + CMP R(TMP), R(TE) + BLS _b4tail + + MOVM.DB.W (R(FROM)), [R4-R7] + MOVM.DB.W [R4-R7], (R(TE)) + MOVM.DB.W (R(FROM)), [R4-R7] + MOVM.DB.W [R4-R7], (R(TE)) + B _b32loop + +_b4tail: /* do remaining words if possible */ + ADD $3, R(TS), R(TMP) +_b4loop: + CMP R(TMP), R(TE) + BLS _b1tail + + MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ + MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ + B _b4loop + +_b1tail: /* remaining bytes */ + CMP R(TE), R(TS) + BEQ _return + + MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ + MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ + B _b1tail + +_forward: + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _f1tail + +_f4align: /* align destination on 4 */ + AND.S $3, R(TS), R(TMP) + BEQ _f4aligned + + MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ + MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ + B _f4align + +_f4aligned: /* is source now aligned? */ + AND.S $3, R(FROM), R(TMP) + BNE _funaligned + + SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ +_f32loop: + CMP R(TMP), R(TS) + BHS _f4tail + + MOVM.IA.W (R(FROM)), [R4-R7] + MOVM.IA.W [R4-R7], (R(TS)) + MOVM.IA.W (R(FROM)), [R4-R7] + MOVM.IA.W [R4-R7], (R(TS)) + B _f32loop + +_f4tail: + SUB $3, R(TE), R(TMP) /* do remaining words if possible */ +_f4loop: + CMP R(TMP), R(TS) + BHS _f1tail + + MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ + MOVW.P R4, 4(R(TS)) /* implicit write back */ + B _f4loop + +_f1tail: + CMP R(TS), R(TE) + BEQ _return + + MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ + MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ + B _f1tail + +_return: + MOVW to+0(FP), R0 + RET + +RSHIFT = 4 +LSHIFT = 5 +OFFSET = 11 + +BR0 = 6 +BW0 = 7 +BR1 = 7 +BW1 = 8 + +_bunaligned: + CMP $2, R(TMP) /* is R(TMP) < 2 ? */ + + MOVW.LT $8, R(RSHIFT) /* (R(n)<<24)|(R(n-1)>>8) */ + MOVW.LT $24, R(LSHIFT) + MOVW.LT $1, R(OFFSET) + + MOVW.EQ $16, R(RSHIFT) /* (R(n)<<16)|(R(n-1)>>16) */ + MOVW.EQ $16, R(LSHIFT) + MOVW.EQ $2, R(OFFSET) + + MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ + MOVW.GT $8, R(LSHIFT) + MOVW.GT $3, R(OFFSET) + + ADD $8, R(TS), R(TMP) /* do 8-byte chunks if possible */ + CMP R(TMP), R(TE) + BLS _b1tail + + BIC $3, R(FROM) /* align source */ + MOVW (R(FROM)), R(BR0) /* prime first block register */ + +_bu8loop: + CMP R(TMP), R(TE) + BLS _bu1tail + + MOVW R(BR0)<<R(LSHIFT), R(BW1) + MOVM.DB.W (R(FROM)), [R(BR0)-R(BR1)] + ORR R(BR1)>>R(RSHIFT), R(BW1) + + MOVW R(BR1)<<R(LSHIFT), R(BW0) + ORR R(BR0)>>R(RSHIFT), R(BW0) + + MOVM.DB.W [R(BW0)-R(BW1)], (R(TE)) + B _bu8loop + +_bu1tail: + ADD R(OFFSET), R(FROM) + B _b1tail + +RSHIFT = 4 +LSHIFT = 5 +OFFSET = 11 + +FW0 = 6 +FR0 = 7 +FW1 = 7 +FR1 = 8 + +_funaligned: + CMP $2, R(TMP) + + MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ + MOVW.LT $24, R(LSHIFT) + MOVW.LT $3, R(OFFSET) + + MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ + MOVW.EQ $16, R(LSHIFT) + MOVW.EQ $2, R(OFFSET) + + MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ + MOVW.GT $8, R(LSHIFT) + MOVW.GT $1, R(OFFSET) + + SUB $8, R(TE), R(TMP) /* do 8-byte chunks if possible */ + CMP R(TMP), R(TS) + BHS _f1tail + + BIC $3, R(FROM) /* align source */ + MOVW.P 4(R(FROM)), R(FR1) /* prime last block register, implicit write back */ + +_fu8loop: + CMP R(TMP), R(TS) + BHS _fu1tail + + MOVW R(FR1)>>R(RSHIFT), R(FW0) + MOVM.IA.W (R(FROM)), [R(FR0)-R(FR1)] + ORR R(FR0)<<R(LSHIFT), R(FW0) + + MOVW R(FR0)>>R(RSHIFT), R(FW1) + ORR R(FR1)<<R(LSHIFT), R(FW1) + + MOVM.IA.W [R(FW0)-R(FW1)], (R(TS)) + B _fu8loop + +_fu1tail: + SUB R(OFFSET), R(FROM) + B _f1tail diff --git a/sys/src/ape/lib/ap/arm/memset.s b/sys/src/ape/lib/ap/arm/memset.s new file mode 100755 index 000000000..7ebbb44c3 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/memset.s @@ -0,0 +1,60 @@ +TO = 1 +TOE = 2 +N = 3 +TMP = 3 /* N and TMP don't overlap */ + +TEXT memset(SB), $0 + MOVW R0, R(TO) + MOVW data+4(FP), R(4) + MOVW n+8(FP), R(N) + + ADD R(N), R(TO), R(TOE) /* to end pointer */ + + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _1tail + + AND $0xFF, R(4) + ORR R(4)<<8, R(4) + ORR R(4)<<16, R(4) /* replicate to word */ + +_4align: /* align on 4 */ + AND.S $3, R(TO), R(TMP) + BEQ _4aligned + + MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + B _4align + +_4aligned: + SUB $15, R(TOE), R(TMP) /* do 16-byte chunks if possible */ + CMP R(TMP), R(TO) + BHS _4tail + + MOVW R4, R5 /* replicate */ + MOVW R4, R6 + MOVW R4, R7 + +_f16loop: + CMP R(TMP), R(TO) + BHS _4tail + + MOVM.IA.W [R4-R7], (R(TO)) + B _f16loop + +_4tail: + SUB $3, R(TOE), R(TMP) /* do remaining words if possible */ +_4loop: + CMP R(TMP), R(TO) + BHS _1tail + + MOVW.P R(4), 4(R(TO)) /* implicit write back */ + B _4loop + +_1tail: + CMP R(TO), R(TOE) + BEQ _return + + MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + B _1tail + +_return: + RET diff --git a/sys/src/ape/lib/ap/arm/mkfile b/sys/src/ape/lib/ap/arm/mkfile new file mode 100755 index 000000000..f65e34395 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/mkfile @@ -0,0 +1,25 @@ +APE=/sys/src/ape +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + cycles.$O\ + div.$O\ + getfcr.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + memmove.$O\ + memset.$O\ + notetramp.$O\ + setjmp.$O\ + strchr.$O\ + strcmp.$O\ + strcpy.$O\ + tas.$O\ + vlop.$O\ + vlrt.$O\ + +</sys/src/cmd/mksyslib + +CFLAGS=-c -D_POSIX_SOURCE -D_PLAN9_SOURCE + diff --git a/sys/src/ape/lib/ap/arm/notetramp.c b/sys/src/ape/lib/ap/arm/notetramp.c new file mode 100755 index 000000000..6b1be6641 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/notetramp.c @@ -0,0 +1,72 @@ +#include "../plan9/lib.h" +#include "../plan9/sys9.h" +#include <signal.h> +#include <setjmp.h> + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->pc; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->pc = (unsigned long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->pc = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + + if(j[0]) + _psigblocked = j[1]; + if(nstack == 0 || pcstack[nstack-1].u->sp > j[2+JMPBUFSP]) + longjmp(j+2, ret); + u = pcstack[nstack-1].u; + nstack--; + u->r0 = ret; + if(ret == 0) + u->r0 = 1; + u->pc = j[2+JMPBUFPC]; + u->sp = j[2+JMPBUFSP]; + _NOTED(3); /* NRSTR */ +} diff --git a/sys/src/ape/lib/ap/arm/setjmp.s b/sys/src/ape/lib/ap/arm/setjmp.s new file mode 100755 index 000000000..62a06245c --- /dev/null +++ b/sys/src/ape/lib/ap/arm/setjmp.s @@ -0,0 +1,29 @@ +arg=0 +link=14 +sp=13 + +TEXT setjmp(SB), 1, $-4 + MOVW R(sp), (R(arg+0)) + MOVW R(link), 4(R(arg+0)) + MOVW $0, R0 + RET + +TEXT sigsetjmp(SB), 1, $-4 + MOVW savemask+4(FP), R(arg+2) + MOVW R(arg+2), 0(R(arg+0)) + MOVW $_psigblocked(SB), R(arg+2) + MOVW R2, 4(R(arg+0)) + MOVW R(sp), 8(R(arg+0)) + MOVW R(link), 12(R(arg+0)) + MOVW $0, R(arg+0) + RET + +TEXT longjmp(SB), 1, $-4 + MOVW r+4(FP), R(arg+2) + CMP $0, R(arg+2) + BNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVW $1, R(arg+2) /* bless their pointed heads */ +ok: MOVW (R(arg+0)), R(sp) + MOVW 4(R(arg+0)), R(link) + MOVW R(arg+2), R(arg+0) + RET diff --git a/sys/src/ape/lib/ap/arm/strchr.s b/sys/src/ape/lib/ap/arm/strchr.s new file mode 100755 index 000000000..349b5a49f --- /dev/null +++ b/sys/src/ape/lib/ap/arm/strchr.s @@ -0,0 +1,56 @@ +TEXT strchr(SB), $-4 + MOVBU c+4(FP), R1 + CMP $0, R1 + BEQ _null + +_strchr: /* not looking for a null, byte at a time */ + MOVBU.P 1(R0), R2 + CMP R1, R2 + BEQ _sub1 + + CMP $0, R2 + BNE _strchr + +_return0: /* character not found in string, return 0 */ + MOVW $0, R0 + RET + +_null: /* looking for null, align */ + AND.S $3, R0, R2 + BEQ _aligned + + MOVBU.P 1(R0), R4 + CMP $0, R4 + BEQ _sub1 + B _null + +_aligned: + MOVW $0xFF, R3 /* mask */ + +_loop: + MOVW.P 4(R0), R4 /* 4 at a time */ + TST R4, R3 /* AND.S R2, R3, Rx */ + TST.NE R4>>8, R3 + TST.NE R4>>16, R3 + TST.NE R4>>24, R3 + BNE _loop + + TST R4, R3 /* its somewhere, find it and correct */ + BEQ _sub4 + TST R4>>8, R3 + BEQ _sub3 + TST R4>>16, R3 + BEQ _sub2 + +_sub1: /* compensate for pointer increment */ + SUB $1, R0 + RET +_sub2: + SUB $2, R0 + RET +_sub3: + SUB $3, R0 + RET +_sub4: + SUB $4, R0 + RET diff --git a/sys/src/ape/lib/ap/arm/strcmp.s b/sys/src/ape/lib/ap/arm/strcmp.s new file mode 100755 index 000000000..015e51596 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/strcmp.s @@ -0,0 +1,67 @@ +TEXT strcmp(SB), $-4 + MOVW R0, R1 + MOVW s2+4(FP), R2 + + MOVW $0xFF, R3 /* mask */ + +_align: /* align s1 on 4 */ + TST $3, R1 + BEQ _aligned + + MOVBU.P 1(R1), R4 /* implicit write back */ + MOVBU.P 1(R2), R8 /* implicit write back */ + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + B _align + +_aligned: /* is s2 now aligned? */ + TST $3, R2 + BNE _unaligned + +_aloop: + MOVW.P 4(R1), R5 /* 4 at a time */ + MOVW.P 4(R2), R7 + + AND R5, R3, R4 + AND R7, R3, R8 + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + + AND R5>>8, R3, R4 + AND R7>>8, R3, R8 + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + + AND R5>>16, R3, R4 + AND R7>>16, R3, R8 + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + + AND R5>>24, R3, R4 + AND R7>>24, R3, R8 + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + + B _aloop + +_return: + RET + +_unaligned: + MOVBU.P 1(R1), R4 /* implicit write back */ + MOVBU.P 1(R2), R8 /* implicit write back */ + SUB.S R8, R4, R0 + BNE _return + CMP $0, R4 + BEQ _return + B _unaligned diff --git a/sys/src/ape/lib/ap/arm/strcpy.s b/sys/src/ape/lib/ap/arm/strcpy.s new file mode 100755 index 000000000..3e69fdc7d --- /dev/null +++ b/sys/src/ape/lib/ap/arm/strcpy.s @@ -0,0 +1,46 @@ +TEXT strcpy(SB), $-4 + MOVW R0, to+0(FP) /* need to save for return value */ + MOVW from+4(FP), R1 + MOVW $0xFF, R2 /* mask */ + +salign: /* align source on 4 */ + AND.S $3, R1, R3 + BEQ dalign + MOVBU.P 1(R1), R3 /* implicit write back */ + TST R3, R2 + MOVBU.P R3, 1(R0) /* implicit write back */ + BNE salign + B return + +dalign: /* is destination now aligned? */ + AND.S $3, R0, R3 + BNE uloop + +aloop: + MOVW.P 4(R1), R4 /* read 4, write 4 */ + TST R4, R2 /* AND.S R3, R2, Rx */ + TST.NE R4>>8, R2 + TST.NE R4>>16, R2 + TST.NE R4>>24, R2 + BEQ tail + MOVW.P R4, 4(R0) + B aloop + +uloop: + MOVW.P 4(R1), R4 /* read 4, write 1,1,1,1 */ + +tail: + AND.S R4, R2, R3 + MOVBU.NE.P R3, 1(R0) + AND.NE.S R4>>8, R2, R3 + MOVBU.NE.P R3, 1(R0) + AND.NE.S R4>>16, R2, R3 + MOVBU.NE.P R3, 1(R0) + AND.NE.S R4>>24, R2, R3 + MOVBU.P R3, 1(R0) + BNE uloop + B return + +return: + MOVW to+0(FP), R0 + RET diff --git a/sys/src/ape/lib/ap/arm/tas.s b/sys/src/ape/lib/ap/arm/tas.s new file mode 100755 index 000000000..f1269209e --- /dev/null +++ b/sys/src/ape/lib/ap/arm/tas.s @@ -0,0 +1,5 @@ +TEXT tas(SB), $-4 + MOVW R0,R1 + MOVW $1,R0 + SWPW R0,(R1) + RET diff --git a/sys/src/ape/lib/ap/arm/vlop.s b/sys/src/ape/lib/ap/arm/vlop.s new file mode 100755 index 000000000..3a5375541 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/vlop.s @@ -0,0 +1,13 @@ +TEXT _mulv(SB), $0 + MOVW 4(FP),R8 /* l0 */ + MOVW 8(FP),R11 /* h0 */ + MOVW 12(FP),R4 /* l1 */ + MOVW 16(FP),R5 /* h1 */ + MULLU R8,R4,(R6, R7) /* l0*l1 */ + MUL R8,R5,R5 /* l0*h1 */ + MUL R11,R4,R4 /* h0*l1 */ + ADD R4,R6 + ADD R5,R6 + MOVW R6,4(R0) + MOVW R7,0(R0) + RET diff --git a/sys/src/ape/lib/ap/arm/vlrt.c b/sys/src/ape/lib/ap/arm/vlrt.c new file mode 100755 index 000000000..5e9524d34 --- /dev/null +++ b/sys/src/ape/lib/ap/arm/vlrt.c @@ -0,0 +1,708 @@ +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef signed char schar; + +#define SIGN(n) (1UL<<(n-1)) + +typedef struct Vlong Vlong; +struct Vlong +{ + ulong lo; + ulong hi; +}; + +void abort(void); + +/* needed by profiler; can't be profiled */ +#pragma profile off + +void +_addv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo + b.lo; + hi = a.hi + b.hi; + if(lo < a.lo) + hi++; + r->lo = lo; + r->hi = hi; +} + +void +_subv(Vlong *r, Vlong a, Vlong b) +{ + ulong lo, hi; + + lo = a.lo - b.lo; + hi = a.hi - b.hi; + if(lo > a.lo) + hi--; + r->lo = lo; + r->hi = hi; +} + +#pragma profile on + +void +_d2v(Vlong *y, double d) +{ + union { double d; struct Vlong; } x; + ulong xhi, xlo, ylo, yhi; + int sh; + + x.d = d; + + xhi = (x.hi & 0xfffff) | 0x100000; + xlo = x.lo; + sh = 1075 - ((x.hi >> 20) & 0x7ff); + + ylo = 0; + yhi = 0; + if(sh >= 0) { + /* v = (hi||lo) >> sh */ + if(sh < 32) { + if(sh == 0) { + ylo = xlo; + yhi = xhi; + } else { + ylo = (xlo >> sh) | (xhi << (32-sh)); + yhi = xhi >> sh; + } + } else { + if(sh == 32) { + ylo = xhi; + } else + if(sh < 64) { + ylo = xhi >> (sh-32); + } + } + } else { + /* v = (hi||lo) << -sh */ + sh = -sh; + if(sh <= 10) { + ylo = xlo << sh; + yhi = (xhi << sh) | (xlo >> (32-sh)); + } else { + /* overflow */ + yhi = d; /* causes something awful */ + } + } + if(x.hi & SIGN(32)) { + if(ylo != 0) { + ylo = -ylo; + yhi = ~yhi; + } else + yhi = -yhi; + } + + y->hi = yhi; + y->lo = ylo; +} + +void +_f2v(Vlong *y, float f) +{ + _d2v(y, f); +} + +double +_v2d(Vlong x) +{ + if(x.hi & SIGN(32)) { + if(x.lo) { + x.lo = -x.lo; + x.hi = ~x.hi; + } else + x.hi = -x.hi; + return -((long)x.hi*4294967296. + x.lo); + } + return (long)x.hi*4294967296. + x.lo; +} + +float +_v2f(Vlong x) +{ + return _v2d(x); +} + + +static void +dodiv(Vlong num, Vlong den, Vlong *q, Vlong *r) +{ + ulong numlo, numhi, denhi, denlo, quohi, quolo, t; + int i; + + numhi = num.hi; + numlo = num.lo; + denhi = den.hi; + denlo = den.lo; + /* + * get a divide by zero + */ + if(denlo==0 && denhi==0) { + numlo = numlo / denlo; + } + + /* + * set up the divisor and find the number of iterations needed + */ + if(numhi >= SIGN(32)) { + quohi = SIGN(32); + quolo = 0; + } else { + quohi = numhi; + quolo = numlo; + } + i = 0; + while(denhi < quohi || (denhi == quohi && denlo < quolo)) { + denhi = (denhi<<1) | (denlo>>31); + denlo <<= 1; + i++; + } + + quohi = 0; + quolo = 0; + for(; i >= 0; i--) { + quohi = (quohi<<1) | (quolo>>31); + quolo <<= 1; + if(numhi > denhi || (numhi == denhi && numlo >= denlo)) { + t = numlo; + numlo -= denlo; + if(numlo > t) + numhi--; + numhi -= denhi; + quolo |= 1; + } + denlo = (denlo>>1) | (denhi<<31); + denhi >>= 1; + } + + if(q) { + q->lo = quolo; + q->hi = quohi; + } + if(r) { + r->lo = numlo; + r->hi = numhi; + } +} + +void +_divvu(Vlong *q, Vlong n, Vlong d) +{ + if(n.hi == 0 && d.hi == 0) { + q->hi = 0; + q->lo = n.lo / d.lo; + return; + } + dodiv(n, d, q, 0); +} + +void +_modvu(Vlong *r, Vlong n, Vlong d) +{ + + if(n.hi == 0 && d.hi == 0) { + r->hi = 0; + r->lo = n.lo % d.lo; + return; + } + dodiv(n, d, 0, r); +} + +static void +vneg(Vlong *v) +{ + + if(v->lo == 0) { + v->hi = -v->hi; + return; + } + v->lo = -v->lo; + v->hi = ~v->hi; +} + +void +_divv(Vlong *q, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + q->lo = (long)n.lo / (long)d.lo; + q->hi = ((long)q->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, q, 0); + if(nneg != dneg) + vneg(q); +} + +void +_modv(Vlong *r, Vlong n, Vlong d) +{ + long nneg, dneg; + + if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) { + r->lo = (long)n.lo % (long)d.lo; + r->hi = ((long)r->lo) >> 31; + return; + } + nneg = n.hi >> 31; + if(nneg) + vneg(&n); + dneg = d.hi >> 31; + if(dneg) + vneg(&d); + dodiv(n, d, 0, r); + if(nneg) + vneg(r); +} + +void +_rshav(Vlong *r, Vlong a, int b) +{ + long t; + + t = a.hi; + if(b >= 32) { + r->hi = t>>31; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = t>>31; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_rshlv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.hi; + if(b >= 32) { + r->hi = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->lo = 0; + return; + } + r->lo = t >> (b-32); + return; + } + if(b <= 0) { + r->hi = t; + r->lo = a.lo; + return; + } + r->hi = t >> b; + r->lo = (t << (32-b)) | (a.lo >> b); +} + +void +_lshv(Vlong *r, Vlong a, int b) +{ + ulong t; + + t = a.lo; + if(b >= 32) { + r->lo = 0; + if(b >= 64) { + /* this is illegal re C standard */ + r->hi = 0; + return; + } + r->hi = t << (b-32); + return; + } + if(b <= 0) { + r->lo = t; + r->hi = a.hi; + return; + } + r->lo = t << b; + r->hi = (t >> (32-b)) | (a.hi << b); +} + +void +_andv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi & b.hi; + r->lo = a.lo & b.lo; +} + +void +_orv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi | b.hi; + r->lo = a.lo | b.lo; +} + +void +_xorv(Vlong *r, Vlong a, Vlong b) +{ + r->hi = a.hi ^ b.hi; + r->lo = a.lo ^ b.lo; +} + +void +_vpp(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + r->lo++; + if(r->lo == 0) + r->hi++; +} + +void +_vmm(Vlong *l, Vlong *r) +{ + + l->hi = r->hi; + l->lo = r->lo; + if(r->lo == 0) + r->hi--; + r->lo--; +} + +void +_ppv(Vlong *l, Vlong *r) +{ + + r->lo++; + if(r->lo == 0) + r->hi++; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_mmv(Vlong *l, Vlong *r) +{ + + if(r->lo == 0) + r->hi--; + r->lo--; + l->hi = r->hi; + l->lo = r->lo; +} + +void +_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv) +{ + Vlong t, u; + + u = *ret; + switch(type) { + default: + abort(); + break; + + case 1: /* schar */ + t.lo = *(schar*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(schar*)lv = u.lo; + break; + + case 2: /* uchar */ + t.lo = *(uchar*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uchar*)lv = u.lo; + break; + + case 3: /* short */ + t.lo = *(short*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(short*)lv = u.lo; + break; + + case 4: /* ushort */ + t.lo = *(ushort*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ushort*)lv = u.lo; + break; + + case 9: /* int */ + t.lo = *(int*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(int*)lv = u.lo; + break; + + case 10: /* uint */ + t.lo = *(uint*)lv; + t.hi = 0; + fn(&u, t, rv); + *(uint*)lv = u.lo; + break; + + case 5: /* long */ + t.lo = *(long*)lv; + t.hi = t.lo >> 31; + fn(&u, t, rv); + *(long*)lv = u.lo; + break; + + case 6: /* ulong */ + t.lo = *(ulong*)lv; + t.hi = 0; + fn(&u, t, rv); + *(ulong*)lv = u.lo; + break; + + case 7: /* vlong */ + case 8: /* uvlong */ + fn(&u, *(Vlong*)lv, rv); + *(Vlong*)lv = u; + break; + } + *ret = u; +} + +void +_p2v(Vlong *ret, void *p) +{ + long t; + + t = (ulong)p; + ret->lo = t; + ret->hi = 0; +} + +void +_sl2v(Vlong *ret, long sl) +{ + long t; + + t = sl; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ul2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul; + ret->lo = t; + ret->hi = 0; +} + +void +_si2v(Vlong *ret, int si) +{ + long t; + + t = si; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_ui2v(Vlong *ret, uint ui) +{ + long t; + + t = ui; + ret->lo = t; + ret->hi = 0; +} + +void +_sh2v(Vlong *ret, long sh) +{ + long t; + + t = (sh << 16) >> 16; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uh2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xffff; + ret->lo = t; + ret->hi = 0; +} + +void +_sc2v(Vlong *ret, long uc) +{ + long t; + + t = (uc << 24) >> 24; + ret->lo = t; + ret->hi = t >> 31; +} + +void +_uc2v(Vlong *ret, ulong ul) +{ + long t; + + t = ul & 0xff; + ret->lo = t; + ret->hi = 0; +} + +long +_v2sc(Vlong rv) +{ + long t; + + t = rv.lo & 0xff; + return (t << 24) >> 24; +} + +long +_v2uc(Vlong rv) +{ + + return rv.lo & 0xff; +} + +long +_v2sh(Vlong rv) +{ + long t; + + t = rv.lo & 0xffff; + return (t << 16) >> 16; +} + +long +_v2uh(Vlong rv) +{ + + return rv.lo & 0xffff; +} + +long +_v2sl(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ul(Vlong rv) +{ + + return rv.lo; +} + +long +_v2si(Vlong rv) +{ + + return rv.lo; +} + +long +_v2ui(Vlong rv) +{ + + return rv.lo; +} + +int +_testv(Vlong rv) +{ + return rv.lo || rv.hi; +} + +int +_eqv(Vlong lv, Vlong rv) +{ + return lv.lo == rv.lo && lv.hi == rv.hi; +} + +int +_nev(Vlong lv, Vlong rv) +{ + return lv.lo != rv.lo || lv.hi != rv.hi; +} + +int +_ltv(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lev(Vlong lv, Vlong rv) +{ + return (long)lv.hi < (long)rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_gtv(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_gev(Vlong lv, Vlong rv) +{ + return (long)lv.hi > (long)rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} + +int +_lov(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo < rv.lo); +} + +int +_lsv(Vlong lv, Vlong rv) +{ + return lv.hi < rv.hi || + (lv.hi == rv.hi && lv.lo <= rv.lo); +} + +int +_hiv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo > rv.lo); +} + +int +_hsv(Vlong lv, Vlong rv) +{ + return lv.hi > rv.hi || + (lv.hi == rv.hi && lv.lo >= rv.lo); +} |