From f3f93925173d15ca48e90ce1624452d7e3b7726f Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Sun, 29 Oct 2017 23:09:54 +0100 Subject: =?UTF-8?q?kernel:=20introduce=20devswap=20#=C2=B6=20to=20serve=20?= =?UTF-8?q?/dev/swap=20and=20handle=20swapfile=20encryption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sys/src/9/bcm/main.c | 1 - sys/src/9/bcm/mkfile | 1 - sys/src/9/bcm/picpuf | 1 + sys/src/9/bcm/pif | 1 + sys/src/9/boot/bootrc | 2 +- sys/src/9/kw/main.c | 1 - sys/src/9/kw/mkfile | 1 - sys/src/9/kw/plug | 1 + sys/src/9/mtx/main.c | 1 - sys/src/9/mtx/mkfile | 1 - sys/src/9/mtx/mtx | 1 + sys/src/9/mtx/mtxcpu | 1 + sys/src/9/omap/beagle | 1 + sys/src/9/omap/main.c | 1 - sys/src/9/omap/mkfile | 1 - sys/src/9/pc/main.c | 1 - sys/src/9/pc/mkfile | 1 - sys/src/9/pc/pc | 1 + sys/src/9/pc64/main.c | 1 - sys/src/9/pc64/mkfile | 1 - sys/src/9/pc64/pc64 | 1 + sys/src/9/port/devcons.c | 54 +--- sys/src/9/port/devswap.c | 612 ++++++++++++++++++++++++++++++++++++++++++++++ sys/src/9/port/portfns.h | 2 - sys/src/9/port/portmkfile | 12 +- sys/src/9/port/swap.c | 430 -------------------------------- sys/src/9/ppc/blast | 1 + sys/src/9/ppc/main.c | 1 - sys/src/9/ppc/mkfile | 1 - sys/src/9/sgi/indy | 1 + sys/src/9/sgi/main.c | 2 - sys/src/9/sgi/mkfile | 1 - sys/src/9/teg2/main.c | 1 - sys/src/9/teg2/mkfile | 1 - sys/src/9/teg2/ts | 1 + sys/src/9/xen/main.c | 2 - sys/src/9/xen/mkfile | 1 - sys/src/9/xen/xenpcf | 1 + sys/src/9/zynq/main.c | 1 - sys/src/9/zynq/mkfile | 1 - sys/src/9/zynq/zynq | 1 + 41 files changed, 634 insertions(+), 515 deletions(-) create mode 100644 sys/src/9/port/devswap.c delete mode 100644 sys/src/9/port/swap.c (limited to 'sys/src') diff --git a/sys/src/9/bcm/main.c b/sys/src/9/bcm/main.c index 69fb61bb1..4167f2b6f 100644 --- a/sys/src/9/bcm/main.c +++ b/sys/src/9/bcm/main.c @@ -270,7 +270,6 @@ main(void) links(); chandevreset(); /* most devices are discovered here */ pageinit(); - swapinit(); userinit(); gpiomeminit(); schedinit(); diff --git a/sys/src/9/bcm/mkfile b/sys/src/9/bcm/mkfile index 41b5438ef..e2bd4c170 100644 --- a/sys/src/9/bcm/mkfile +++ b/sys/src/9/bcm/mkfile @@ -33,7 +33,6 @@ PORT=\ rdb.$O\ rebootcmd.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/bcm/picpuf b/sys/src/9/bcm/picpuf index 70cd23eff..b2c5ab377 100644 --- a/sys/src/9/bcm/picpuf +++ b/sys/src/9/bcm/picpuf @@ -1,6 +1,7 @@ dev root cons + swap env pipe proc diff --git a/sys/src/9/bcm/pif b/sys/src/9/bcm/pif index fc9c040c5..45f4c4394 100644 --- a/sys/src/9/bcm/pif +++ b/sys/src/9/bcm/pif @@ -1,6 +1,7 @@ dev root cons + swap env pipe proc diff --git a/sys/src/9/boot/bootrc b/sys/src/9/boot/bootrc index cf899fb9b..f5f6fe5a6 100755 --- a/sys/src/9/boot/bootrc +++ b/sys/src/9/boot/bootrc @@ -10,7 +10,7 @@ unmount /root bind -q '#d' /fd bind -q '#p' /proc -for(i in S f k æ t b m) +for(i in ¶ P S f k æ t b m) bind -qa '#'^$i /dev # bind in an ip interface diff --git a/sys/src/9/kw/main.c b/sys/src/9/kw/main.c index f2bc302db..ff9a7ee34 100644 --- a/sys/src/9/kw/main.c +++ b/sys/src/9/kw/main.c @@ -322,7 +322,6 @@ wave(' '); chandevreset(); /* most devices are discovered here */ pageinit(); - swapinit(); userinit(); schedinit(); panic("schedinit returned"); diff --git a/sys/src/9/kw/mkfile b/sys/src/9/kw/mkfile index 219d06ffa..8134b3bb2 100644 --- a/sys/src/9/kw/mkfile +++ b/sys/src/9/kw/mkfile @@ -32,7 +32,6 @@ PORT=\ qio.$O\ qlock.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/kw/plug b/sys/src/9/kw/plug index 281d6837f..1d0dc9471 100644 --- a/sys/src/9/kw/plug +++ b/sys/src/9/kw/plug @@ -3,6 +3,7 @@ dev root cons + swap env pipe proc diff --git a/sys/src/9/mtx/main.c b/sys/src/9/mtx/main.c index 564a197d7..0b1f3401c 100644 --- a/sys/src/9/mtx/main.c +++ b/sys/src/9/mtx/main.c @@ -35,7 +35,6 @@ main(void) links(); chandevreset(); pageinit(); - swapinit(); fpsave(&initfp); initfp.fpscr = 0; userinit(); diff --git a/sys/src/9/mtx/mkfile b/sys/src/9/mtx/mkfile index f1efadf6a..daa4b3e81 100644 --- a/sys/src/9/mtx/mkfile +++ b/sys/src/9/mtx/mkfile @@ -30,7 +30,6 @@ PORT=\ qlock.$O\ rdb.$O\ segment.$O\ - swap.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\ diff --git a/sys/src/9/mtx/mtx b/sys/src/9/mtx/mtx index 01dcd101d..5330459f5 100644 --- a/sys/src/9/mtx/mtx +++ b/sys/src/9/mtx/mtx @@ -1,6 +1,7 @@ dev root cons + swap arch pnp pci env diff --git a/sys/src/9/mtx/mtxcpu b/sys/src/9/mtx/mtxcpu index c95629847..3e08dd763 100644 --- a/sys/src/9/mtx/mtxcpu +++ b/sys/src/9/mtx/mtxcpu @@ -1,6 +1,7 @@ dev root cons + swap arch pnp pci env diff --git a/sys/src/9/omap/beagle b/sys/src/9/omap/beagle index 5306731b0..a29262277 100644 --- a/sys/src/9/omap/beagle +++ b/sys/src/9/omap/beagle @@ -2,6 +2,7 @@ dev root cons + swap env pipe proc diff --git a/sys/src/9/omap/main.c b/sys/src/9/omap/main.c index 6ff256f34..598a3ac50 100644 --- a/sys/src/9/omap/main.c +++ b/sys/src/9/omap/main.c @@ -276,7 +276,6 @@ wave('l'); // i8250console(); /* too early; see init0 */ pageinit(); - swapinit(); userinit(); schedinit(); } diff --git a/sys/src/9/omap/mkfile b/sys/src/9/omap/mkfile index 1b6fdef81..c5b1f94c1 100644 --- a/sys/src/9/omap/mkfile +++ b/sys/src/9/omap/mkfile @@ -33,7 +33,6 @@ PORT=\ qio.$O\ qlock.$O\ segment.$O\ - swap.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\ diff --git a/sys/src/9/pc/main.c b/sys/src/9/pc/main.c index 824259e3a..5408e6982 100644 --- a/sys/src/9/pc/main.c +++ b/sys/src/9/pc/main.c @@ -62,7 +62,6 @@ main(void) chandevreset(); netconsole(); pageinit(); - swapinit(); userinit(); schedinit(); } diff --git a/sys/src/9/pc/mkfile b/sys/src/9/pc/mkfile index 7aac934cc..63044be64 100644 --- a/sys/src/9/pc/mkfile +++ b/sys/src/9/pc/mkfile @@ -35,7 +35,6 @@ PORT=\ rdb.$O\ rebootcmd.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/pc/pc b/sys/src/9/pc/pc index 665b6dd81..47e9da2c3 100644 --- a/sys/src/9/pc/pc +++ b/sys/src/9/pc/pc @@ -2,6 +2,7 @@ dev root cons + swap arch pnp pci env diff --git a/sys/src/9/pc64/main.c b/sys/src/9/pc64/main.c index 48e45a0d1..cc85fe76e 100644 --- a/sys/src/9/pc64/main.c +++ b/sys/src/9/pc64/main.c @@ -332,7 +332,6 @@ main() netconsole(); preallocpages(); pageinit(); - swapinit(); userinit(); schedinit(); } diff --git a/sys/src/9/pc64/mkfile b/sys/src/9/pc64/mkfile index d8a436c35..4d145dc27 100644 --- a/sys/src/9/pc64/mkfile +++ b/sys/src/9/pc64/mkfile @@ -33,7 +33,6 @@ PORT=\ rdb.$O\ rebootcmd.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/pc64/pc64 b/sys/src/9/pc64/pc64 index 6ac8b5fab..c347952df 100644 --- a/sys/src/9/pc64/pc64 +++ b/sys/src/9/pc64/pc64 @@ -2,6 +2,7 @@ dev root cons + swap arch pnp pci env diff --git a/sys/src/9/port/devcons.c b/sys/src/9/port/devcons.c index 67e28a83a..b6c2d8788 100644 --- a/sys/src/9/port/devcons.c +++ b/sys/src/9/port/devcons.c @@ -5,7 +5,6 @@ #include "fns.h" #include "../port/error.h" -#include #include void (*consdebug)(void) = nil; @@ -324,7 +323,6 @@ enum{ Qppid, Qrandom, Qreboot, - Qswap, Qsysname, Qsysstat, Qtime, @@ -357,7 +355,6 @@ static Dirtab consdir[]={ "ppid", {Qppid}, NUMSIZE, 0444, "random", {Qrandom}, 0, 0444, "reboot", {Qreboot}, 0, 0664, - "swap", {Qswap}, 0, 0664, "sysname", {Qsysname}, 0, 0664, "sysstat", {Qsysstat}, 0, 0666, "time", {Qtime}, NUMSIZE+3*VLNUMSIZE, 0664, @@ -471,8 +468,6 @@ consread(Chan *c, void *buf, long n, vlong off) int i, k, id; vlong offset = off; extern char configfile[]; - extern Image fscache; - extern Image swapimage; if(n <= 0) return n; @@ -592,33 +587,6 @@ consread(Chan *c, void *buf, long n, vlong off) poperror(); return n; - case Qswap: - snprint(tmp, sizeof tmp, - "%llud memory\n" - "%llud pagesize\n" - "%lud kernel\n" - "%lud/%lud user\n" - "%lud/%lud swap\n" - "%llud/%llud/%llud kernel malloc\n" - "%llud/%llud/%llud kernel draw\n" - "%llud/%llud/%llud kernel secret\n", - (uvlong)conf.npage*BY2PG, - (uvlong)BY2PG, - conf.npage-conf.upages, - palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user, - conf.nswap-swapalloc.free, conf.nswap, - (uvlong)mainmem->curalloc, - (uvlong)mainmem->cursize, - (uvlong)mainmem->maxsize, - (uvlong)imagmem->curalloc, - (uvlong)imagmem->cursize, - (uvlong)imagmem->maxsize, - (uvlong)secrmem->curalloc, - (uvlong)secrmem->cursize, - (uvlong)secrmem->maxsize); - - return readstr((ulong)offset, buf, n, tmp); - case Qsysname: if(sysname == nil) return 0; @@ -669,8 +637,7 @@ conswrite(Chan *c, void *va, long n, vlong off) long l, bp; char *a; Mach *mp; - int id, fd; - Chan *swc; + int id; ulong offset; Cmdbuf *cb; Cmdtab *ct; @@ -765,25 +732,6 @@ conswrite(Chan *c, void *va, long n, vlong off) } break; - case Qswap: - if(n >= sizeof buf) - error(Egreg); - memmove(buf, va, n); /* so we can NUL-terminate */ - buf[n] = 0; - /* start a pager if not already started */ - if(strncmp(buf, "start", 5) == 0){ - kickpager(); - break; - } - if(!iseve()) - error(Eperm); - if(buf[0]<'0' || '9' +#include + +static int canflush(Proc*, Segment*); +static void executeio(void); +static void pageout(Proc*, Segment*); +static void pagepte(int, Page**); +static void pager(void*); + +Image swapimage = { + .notext = 1, +}; + +static Chan *swapchan; +static uchar *swapbuf; +static AESstate *swapkey; + +static Page **iolist; +static int ioptr; + +static ushort ageclock; + +static void +swapinit(void) +{ + swapalloc.swmap = xalloc(conf.nswap); + swapalloc.top = &swapalloc.swmap[conf.nswap]; + swapalloc.alloc = swapalloc.swmap; + swapalloc.last = swapalloc.swmap; + swapalloc.free = conf.nswap; + swapalloc.xref = 0; + + iolist = xalloc(conf.nswppo*sizeof(Page*)); + if(swapalloc.swmap == nil || iolist == nil) + panic("swapinit: not enough memory"); +} + +static uintptr +newswap(void) +{ + uchar *look; + + lock(&swapalloc); + if(swapalloc.free == 0) { + unlock(&swapalloc); + return ~0; + } + look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last); + if(look == nil) + look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap); + *look = 2; /* ref for pte + io transaction */ + swapalloc.last = look; + swapalloc.free--; + unlock(&swapalloc); + return (look-swapalloc.swmap) * BY2PG; +} + +void +putswap(Page *p) +{ + uchar *idx; + + lock(&swapalloc); + idx = &swapalloc.swmap[((uintptr)p)/BY2PG]; + if(*idx == 0) + panic("putswap %#p ref == 0", p); + + if(*idx == 255) { + if(swapalloc.xref == 0) + panic("putswap %#p xref == 0", p); + + if(--swapalloc.xref == 0) { + for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) { + if(*idx == 255) { + *idx = 0; + swapalloc.free++; + } + } + } + } else { + if(--(*idx) == 0) + swapalloc.free++; + } + unlock(&swapalloc); +} + +void +dupswap(Page *p) +{ + uchar *idx; + + lock(&swapalloc); + idx = &swapalloc.swmap[((uintptr)p)/BY2PG]; + if(*idx == 255) + swapalloc.xref++; + else { + if(++(*idx) == 255) + swapalloc.xref += 255; + } + unlock(&swapalloc); +} + +int +swapcount(uintptr daddr) +{ + return swapalloc.swmap[daddr/BY2PG]; +} + +void +kickpager(void) +{ + static Ref started; + + if(started.ref || incref(&started) != 1) + wakeup(&swapalloc.r); + else + kproc("pager", pager, 0); +} + +static int +reclaim(void) +{ + ulong np; + + for(;;){ + if((np = pagereclaim(&fscache, 1000)) > 0) { + if(0) print("reclaim: %lud fscache\n", np); + } else if((np = pagereclaim(&swapimage, 1000)) > 0) { + if(0) print("reclaim: %lud swap\n", np); + } else if((np = imagereclaim(1000)) > 0) { + if(0) print("reclaim: %lud image\n", np); + } + if(!needpages(nil)) + return 1; /* have pages, done */ + if(np == 0) + return 0; /* didnt reclaim, need to swap */ + sched(); + } +} + +static void +pager(void*) +{ + int i; + Segment *s; + Proc *p, *ep; + + p = proctab(0); + ep = &p[conf.nproc]; + + while(waserror()) + ; + + for(;;){ + up->psstate = "Reclaim"; + if(reclaim()){ + up->psstate = "Idle"; + wakeup(&palloc.pwait[0]); + wakeup(&palloc.pwait[1]); + sleep(&swapalloc.r, needpages, nil); + continue; + } + + if(swapimage.c == nil || swapalloc.free == 0){ + Killbig: + if(!freebroken()) + killbig("out of memory"); + sched(); + continue; + } + + i = ageclock; + do { + if(++p >= ep){ + if(++ageclock == i) + goto Killbig; + p = proctab(0); + } + } while(p->state == Dead || p->noswap || !canqlock(&p->seglock)); + up->psstate = "Pageout"; + for(i = 0; i < NSEG; i++) { + if((s = p->seg[i]) != nil) { + switch(s->type&SG_TYPE) { + default: + break; + case SG_TEXT: + pageout(p, s); + break; + case SG_DATA: + case SG_BSS: + case SG_STACK: + case SG_SHARED: + pageout(p, s); + break; + } + } + } + qunlock(&p->seglock); + + if(ioptr > 0) { + up->psstate = "I/O"; + executeio(); + } + } +} + +static void +pageout(Proc *p, Segment *s) +{ + int type, i, size; + short age; + Pte *l; + Page **pg, *entry; + + if(!canqlock(s)) /* We cannot afford to wait, we will surely deadlock */ + return; + + if(!canflush(p, s) /* Able to invalidate all tlbs with references */ + || waserror()) { + qunlock(s); + putseg(s); + return; + } + + /* Pass through the pte tables looking for memory pages to swap out */ + type = s->type&SG_TYPE; + size = s->mapsize; + for(i = 0; i < size; i++) { + l = s->map[i]; + if(l == nil) + continue; + for(pg = l->first; pg <= l->last; pg++) { + entry = *pg; + if(pagedout(entry)) + continue; + if(entry->modref & PG_REF) { + entry->modref &= ~PG_REF; + entry->refage = ageclock; + continue; + } + age = (short)(ageclock - entry->refage); + if(age < 16) + continue; + pagepte(type, pg); + } + } + poperror(); + qunlock(s); + putseg(s); +} + +static int +canflush(Proc *p, Segment *s) +{ + int i; + Proc *ep; + + if(incref(s) == 2) /* Easy if we are the only user */ + return canpage(p); + + /* Now we must do hardwork to ensure all processes which have tlb + * entries for this segment will be flushed if we succeed in paging it out + */ + p = proctab(0); + ep = &p[conf.nproc]; + while(p < ep) { + if(p->state != Dead) { + for(i = 0; i < NSEG; i++) + if(p->seg[i] == s) + if(!canpage(p)) + return 0; + } + p++; + } + return 1; +} + +static void +pagepte(int type, Page **pg) +{ + uintptr daddr; + Page *outp; + + outp = *pg; + switch(type) { + case SG_TEXT: /* Revert to demand load */ + putpage(outp); + *pg = nil; + break; + + case SG_DATA: + case SG_BSS: + case SG_STACK: + case SG_SHARED: + if(ioptr >= conf.nswppo) + break; + + /* + * get a new swap address with swapcount 2, one for the pte + * and one extra ref for us while we write the page to disk + */ + daddr = newswap(); + if(daddr == ~0) + break; + + /* clear any pages referring to it from the cache */ + cachedel(&swapimage, daddr); + + /* forget anything that it used to cache */ + uncachepage(outp); + + /* + * enter it into the cache so that a fault happening + * during the write will grab the page from the cache + * rather than one partially written to the disk + */ + outp->daddr = daddr; + cachepage(outp, &swapimage); + *pg = (Page*)(daddr|PG_ONSWAP); + + /* Add page to IO transaction list */ + iolist[ioptr++] = outp; + break; + } +} + +void +pagersummary(void) +{ + print("%lud/%lud memory %lud/%lud swap %d iolist\n", + palloc.user-palloc.freecount, + palloc.user, conf.nswap-swapalloc.free, conf.nswap, + ioptr); +} + +static void +executeio(void) +{ + Page *outp; + int i, n; + Chan *c; + char *kaddr; + KMap *k; + + c = swapimage.c; + for(i = 0; i < ioptr; i++) { + if(ioptr > conf.nswppo) + panic("executeio: ioptr %d > %d", ioptr, conf.nswppo); + outp = iolist[i]; + + assert(outp->ref > 0); + assert(outp->image == &swapimage); + assert(outp->daddr != ~0); + + /* only write when swap address still in use */ + if(swapcount(outp->daddr) > 1){ + k = kmap(outp); + kaddr = (char*)VA(k); + + if(waserror()) + panic("executeio: page outp I/O error"); + + n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr); + if(n != BY2PG) + nexterror(); + + kunmap(k); + poperror(); + } + + /* drop our extra swap reference */ + putswap((Page*)outp->daddr); + + /* Free up the page after I/O */ + putpage(outp); + } + ioptr = 0; +} + +int +needpages(void*) +{ + return palloc.freecount < swapalloc.headroom; +} + +static void +setswapchan(Chan *c) +{ + uchar buf[sizeof(Dir)+100]; + Dir d; + int n; + + if(waserror()){ + cclose(c); + nexterror(); + } + if(swapimage.c != nil) { + if(swapalloc.free != conf.nswap) + error(Einuse); + cclose(swapimage.c); + swapimage.c = nil; + } + + /* + * if this isn't a file, set the swap space + * to be at most the size of the partition + */ + if(devtab[c->type]->dc != L'M'){ + n = devtab[c->type]->stat(c, buf, sizeof buf); + if(n <= 0 || convM2D(buf, n, &d, nil) == 0) + error("stat failed in setswapchan"); + if(d.length < conf.nswppo*BY2PG) + error("swap device too small"); + if(d.length < conf.nswap*BY2PG){ + conf.nswap = d.length/BY2PG; + swapalloc.top = &swapalloc.swmap[conf.nswap]; + swapalloc.free = conf.nswap; + } + } + c->flag &= ~CCACHE; + cclunk(c); + poperror(); + + swapchan = c; + swapimage.c = namec("#¶/swapfile", Aopen, ORDWR, 0); +} + +enum { + Qdir, + Qswap, + Qswapfile, +}; + +static Dirtab swapdir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "swap", {Qswap}, 0, 0664, + "swapfile", {Qswapfile}, 0, 0600, +}; + +static Chan* +swapattach(char *spec) +{ + return devattach(L'¶', spec); +} + +static Walkqid* +swapwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, swapdir, nelem(swapdir), devgen); +} + +static int +swapstat(Chan *c, uchar *dp, int n) +{ + return devstat(c, dp, n, swapdir, nelem(swapdir), devgen); +} + +static Chan* +swapopen(Chan *c, int omode) +{ + uchar key[128/8]; + + switch((ulong)c->qid.path){ + case Qswapfile: + if(!iseve() || omode != ORDWR) + error(Eperm); + if(swapimage.c != nil) + error(Einuse); + if(swapchan == nil) + error(Egreg); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + + swapbuf = mallocalign(BY2PG, BY2PG, 0, 0); + swapkey = secalloc(sizeof(AESstate)*2); + if(swapbuf == nil || swapkey == nil) + error(Enomem); + + genrandom(key, sizeof(key)); + setupAESstate(&swapkey[0], key, sizeof(key), nil); + genrandom(key, sizeof(key)); + setupAESstate(&swapkey[1], key, sizeof(key), nil); + memset(key, 0, sizeof(key)); + + return c; + } + return devopen(c, omode, swapdir, nelem(swapdir), devgen); +} + +static void +swapclose(Chan *c) +{ + if((c->flag & COPEN) == 0) + return; + switch((ulong)c->qid.path){ + case Qswapfile: + cclose(swapchan); + swapchan = nil; + secfree(swapkey); + swapkey = nil; + free(swapbuf); + swapbuf = nil; + break; + } +} + +static long +swapread(Chan *c, void *va, long n, vlong off) +{ + char tmp[256]; /* must be >= 18*NUMSIZE (Qswap) */ + + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, va, n, swapdir, nelem(swapdir), devgen); + case Qswap: + snprint(tmp, sizeof tmp, + "%llud memory\n" + "%llud pagesize\n" + "%lud kernel\n" + "%lud/%lud user\n" + "%lud/%lud swap\n" + "%llud/%llud/%llud kernel malloc\n" + "%llud/%llud/%llud kernel draw\n" + "%llud/%llud/%llud kernel secret\n", + (uvlong)conf.npage*BY2PG, + (uvlong)BY2PG, + conf.npage-conf.upages, + palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user, + conf.nswap-swapalloc.free, conf.nswap, + (uvlong)mainmem->curalloc, + (uvlong)mainmem->cursize, + (uvlong)mainmem->maxsize, + (uvlong)imagmem->curalloc, + (uvlong)imagmem->cursize, + (uvlong)imagmem->maxsize, + (uvlong)secrmem->curalloc, + (uvlong)secrmem->cursize, + (uvlong)secrmem->maxsize); + return readstr((ulong)off, va, n, tmp); + case Qswapfile: + if(n != BY2PG) + error(Ebadarg); + if(devtab[swapchan->type]->read(swapchan, va, n, off) != n) + error(Eio); + aes_xts_decrypt(&swapkey[0], &swapkey[1], off, va, va, n); + return n; + } + error(Egreg); + return 0; +} + +static long +swapwrite(Chan *c, void *va, long n, vlong off) +{ + char buf[256]; + + switch((ulong)c->qid.path){ + case Qswap: + if(!iseve()) + error(Eperm); + if(n >= sizeof buf) + error(Egreg); + memmove(buf, va, n); /* so we can NUL-terminate */ + buf[n] = 0; + /* start a pager if not already started */ + if(strncmp(buf, "start", 5) == 0) + kickpager(); + else if(buf[0]>='0' && '9'<=buf[0]) + setswapchan(fdtochan(strtoul(buf, nil, 0), ORDWR, 1, 1)); + else + error(Ebadctl); + return n; + case Qswapfile: + if(n != BY2PG) + error(Ebadarg); + aes_xts_encrypt(&swapkey[0], &swapkey[1], off, va, swapbuf, n); + if(devtab[swapchan->type]->write(swapchan, swapbuf, n, off) != n) + error(Eio); + return n; + } + error(Egreg); + return 0; +} + +Dev swapdevtab = { + L'¶', + "swap", + devreset, + swapinit, + devshutdown, + swapattach, + swapwalk, + swapstat, + swapopen, + devcreate, + swapclose, + swapread, + devbread, + swapwrite, + devbwrite, + devremove, + devwstat, +}; diff --git a/sys/src/9/port/portfns.h b/sys/src/9/port/portfns.h index 803622fd8..c238c818e 100644 --- a/sys/src/9/port/portfns.h +++ b/sys/src/9/port/portfns.h @@ -318,7 +318,6 @@ int setlabel(Label*); void setmalloctag(void*, uintptr); void setrealloctag(void*, uintptr); void setregisters(Ureg*, char*, char*, int); -void setswapchan(Chan*); void setupwatchpts(Proc*, Watchpt*, int); char* skipslash(char*); void sleep(Rendez*, int(*)(void*), void*); @@ -332,7 +331,6 @@ void srvrenameuser(char*, char*); void shrrenameuser(char*, char*); int swapcount(uintptr); int swapfull(void); -void swapinit(void); void syscallfmt(ulong syscallno, uintptr pc, va_list list); void sysretfmt(ulong syscallno, va_list list, uintptr ret, uvlong start, uvlong stop); void timeradd(Timer*); diff --git a/sys/src/9/port/portmkfile b/sys/src/9/port/portmkfile index c16cf9c93..ec437d327 100644 --- a/sys/src/9/port/portmkfile +++ b/sys/src/9/port/portmkfile @@ -62,15 +62,15 @@ errstr.h: ../port/mkerrstr ../port/error.h %.db: main.$O $CC -s$stem main.c | dbfmt > $stem.db -alloc.$O: /sys/include/pool.h +alloc.$O devswap.$O: /sys/include/pool.h devmnt.$O: /sys/include/fcall.h proc.$O proc.acid: errstr.h devroot.$O: errstr.h devaudio.$O: ../port/audioif.h -devaoe.$O: /$objtype/include/ureg.h -devfs.$O: /$objtype/include/ureg.h -devsd.$O: /$objtype/include/ureg.h -sdscsi.$O: /$objtype/include/ureg.h +devaoe.$O: ../port/sd.h /$objtype/include/ureg.h +devfs.$O: ../port/sd.h /$objtype/include/ureg.h +devsd.$O: ../port/sd.h /$objtype/include/ureg.h +sdscsi.$O: ../port/sd.h /$objtype/include/ureg.h trap.$O: /$objtype/include/ureg.h devproc.$O: /$objtype/include/ureg.h main.$O: init.h @@ -87,3 +87,5 @@ unthwack.$O: ../port/thwack.h devsdp.$O: ../port/thwack.h devproc.$O sysproc.$O: /sys/include/tos.h devproc.$O edf.$O proc.$O: /sys/include/trace.h +devcons.$O: /sys/include/authsrv.h +devcap.$O devfs.$O devsdp.$O devssl.$O devtls.$O devswap.$O random.$O: /sys/include/libsec.h diff --git a/sys/src/9/port/swap.c b/sys/src/9/port/swap.c deleted file mode 100644 index 81d3bf26b..000000000 --- a/sys/src/9/port/swap.c +++ /dev/null @@ -1,430 +0,0 @@ -#include "u.h" -#include "../port/lib.h" -#include "mem.h" -#include "dat.h" -#include "fns.h" -#include "../port/error.h" - -static int canflush(Proc*, Segment*); -static void executeio(void); -static void pageout(Proc*, Segment*); -static void pagepte(int, Page**); -static void pager(void*); - -Image swapimage; - -static int swopen; -static Page **iolist; -static int ioptr; - -static ushort ageclock; - -void -swapinit(void) -{ - swapalloc.swmap = xalloc(conf.nswap); - swapalloc.top = &swapalloc.swmap[conf.nswap]; - swapalloc.alloc = swapalloc.swmap; - swapalloc.last = swapalloc.swmap; - swapalloc.free = conf.nswap; - swapalloc.xref = 0; - - iolist = xalloc(conf.nswppo*sizeof(Page*)); - if(swapalloc.swmap == 0 || iolist == 0) - panic("swapinit: not enough memory"); - - swapimage.notext = 1; -} - -static uintptr -newswap(void) -{ - uchar *look; - - lock(&swapalloc); - if(swapalloc.free == 0) { - unlock(&swapalloc); - return ~0; - } - look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last); - if(look == nil) - look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap); - *look = 2; /* ref for pte + io transaction */ - swapalloc.last = look; - swapalloc.free--; - unlock(&swapalloc); - return (look-swapalloc.swmap) * BY2PG; -} - -void -putswap(Page *p) -{ - uchar *idx; - - lock(&swapalloc); - idx = &swapalloc.swmap[((uintptr)p)/BY2PG]; - if(*idx == 0) - panic("putswap %#p ref == 0", p); - - if(*idx == 255) { - if(swapalloc.xref == 0) - panic("putswap %#p xref == 0", p); - - if(--swapalloc.xref == 0) { - for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) { - if(*idx == 255) { - *idx = 0; - swapalloc.free++; - } - } - } - } else { - if(--(*idx) == 0) - swapalloc.free++; - } - unlock(&swapalloc); -} - -void -dupswap(Page *p) -{ - uchar *idx; - - lock(&swapalloc); - idx = &swapalloc.swmap[((uintptr)p)/BY2PG]; - if(*idx == 255) - swapalloc.xref++; - else { - if(++(*idx) == 255) - swapalloc.xref += 255; - } - unlock(&swapalloc); -} - -int -swapcount(uintptr daddr) -{ - return swapalloc.swmap[daddr/BY2PG]; -} - -void -kickpager(void) -{ - static Ref started; - - if(started.ref || incref(&started) != 1) - wakeup(&swapalloc.r); - else - kproc("pager", pager, 0); -} - -static int -reclaim(void) -{ - ulong np; - - for(;;){ - if((np = pagereclaim(&fscache, 1000)) > 0) { - if(0) print("reclaim: %lud fscache\n", np); - } else if((np = pagereclaim(&swapimage, 1000)) > 0) { - if(0) print("reclaim: %lud swap\n", np); - } else if((np = imagereclaim(1000)) > 0) { - if(0) print("reclaim: %lud image\n", np); - } - if(!needpages(nil)) - return 1; /* have pages, done */ - if(np == 0) - return 0; /* didnt reclaim, need to swap */ - sched(); - } -} - -static void -pager(void*) -{ - int i; - Segment *s; - Proc *p, *ep; - - p = proctab(0); - ep = &p[conf.nproc]; - - while(waserror()) - ; - - for(;;){ - up->psstate = "Reclaim"; - if(reclaim()){ - up->psstate = "Idle"; - wakeup(&palloc.pwait[0]); - wakeup(&palloc.pwait[1]); - sleep(&swapalloc.r, needpages, nil); - continue; - } - - if(swapimage.c == nil || swapalloc.free == 0){ - Killbig: - if(!freebroken()) - killbig("out of memory"); - sched(); - continue; - } - - i = ageclock; - do { - if(++p >= ep){ - if(++ageclock == i) - goto Killbig; - p = proctab(0); - } - } while(p->state == Dead || p->noswap || !canqlock(&p->seglock)); - up->psstate = "Pageout"; - for(i = 0; i < NSEG; i++) { - if((s = p->seg[i]) != nil) { - switch(s->type&SG_TYPE) { - default: - break; - case SG_TEXT: - pageout(p, s); - break; - case SG_DATA: - case SG_BSS: - case SG_STACK: - case SG_SHARED: - pageout(p, s); - break; - } - } - } - qunlock(&p->seglock); - - if(ioptr > 0) { - up->psstate = "I/O"; - executeio(); - } - } -} - -static void -pageout(Proc *p, Segment *s) -{ - int type, i, size; - short age; - Pte *l; - Page **pg, *entry; - - if(!canqlock(s)) /* We cannot afford to wait, we will surely deadlock */ - return; - - if(!canflush(p, s)) { /* Able to invalidate all tlbs with references */ - qunlock(s); - putseg(s); - return; - } - - if(waserror()) { - qunlock(s); - putseg(s); - return; - } - - /* Pass through the pte tables looking for memory pages to swap out */ - type = s->type&SG_TYPE; - size = s->mapsize; - for(i = 0; i < size; i++) { - l = s->map[i]; - if(l == nil) - continue; - for(pg = l->first; pg <= l->last; pg++) { - entry = *pg; - if(pagedout(entry)) - continue; - if(entry->modref & PG_REF) { - entry->modref &= ~PG_REF; - entry->refage = ageclock; - continue; - } - age = (short)(ageclock - entry->refage); - if(age < 16) - continue; - pagepte(type, pg); - } - } - poperror(); - qunlock(s); - putseg(s); -} - -static int -canflush(Proc *p, Segment *s) -{ - int i; - Proc *ep; - - if(incref(s) == 2) /* Easy if we are the only user */ - return canpage(p); - - /* Now we must do hardwork to ensure all processes which have tlb - * entries for this segment will be flushed if we succeed in paging it out - */ - p = proctab(0); - ep = &p[conf.nproc]; - while(p < ep) { - if(p->state != Dead) { - for(i = 0; i < NSEG; i++) - if(p->seg[i] == s) - if(!canpage(p)) - return 0; - } - p++; - } - return 1; -} - -static void -pagepte(int type, Page **pg) -{ - uintptr daddr; - Page *outp; - - outp = *pg; - switch(type) { - case SG_TEXT: /* Revert to demand load */ - putpage(outp); - *pg = nil; - break; - - case SG_DATA: - case SG_BSS: - case SG_STACK: - case SG_SHARED: - if(ioptr >= conf.nswppo) - break; - - /* - * get a new swap address with swapcount 2, one for the pte - * and one extra ref for us while we write the page to disk - */ - daddr = newswap(); - if(daddr == ~0) - break; - - /* clear any pages referring to it from the cache */ - cachedel(&swapimage, daddr); - - /* forget anything that it used to cache */ - uncachepage(outp); - - /* - * enter it into the cache so that a fault happening - * during the write will grab the page from the cache - * rather than one partially written to the disk - */ - outp->daddr = daddr; - cachepage(outp, &swapimage); - *pg = (Page*)(daddr|PG_ONSWAP); - - /* Add page to IO transaction list */ - iolist[ioptr++] = outp; - break; - } -} - -void -pagersummary(void) -{ - print("%lud/%lud memory %lud/%lud swap %d iolist\n", - palloc.user-palloc.freecount, - palloc.user, conf.nswap-swapalloc.free, conf.nswap, - ioptr); -} - -static void -executeio(void) -{ - Page *outp; - int i, n; - Chan *c; - char *kaddr; - KMap *k; - - c = swapimage.c; - for(i = 0; i < ioptr; i++) { - if(ioptr > conf.nswppo) - panic("executeio: ioptr %d > %d", ioptr, conf.nswppo); - outp = iolist[i]; - - assert(outp->ref > 0); - assert(outp->image == &swapimage); - assert(outp->daddr != ~0); - - /* only write when swap address still in use */ - if(swapcount(outp->daddr) > 1){ - k = kmap(outp); - kaddr = (char*)VA(k); - - if(waserror()) - panic("executeio: page outp I/O error"); - - n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr); - if(n != BY2PG) - nexterror(); - - kunmap(k); - poperror(); - } - - /* drop our extra swap reference */ - putswap((Page*)outp->daddr); - - /* Free up the page after I/O */ - putpage(outp); - } - ioptr = 0; -} - -int -needpages(void*) -{ - return palloc.freecount < swapalloc.headroom; -} - -void -setswapchan(Chan *c) -{ - uchar dirbuf[sizeof(Dir)+100]; - Dir d; - int n; - - if(waserror()){ - cclose(c); - nexterror(); - } - if(swapimage.c != nil) { - if(swapalloc.free != conf.nswap) - error(Einuse); - cclose(swapimage.c); - swapimage.c = nil; - } - - /* - * if this isn't a file, set the swap space - * to be at most the size of the partition - */ - if(devtab[c->type]->dc != L'M'){ - n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf); - if(n <= 0 || convM2D(dirbuf, n, &d, nil) == 0) - error("stat failed in setswapchan"); - if(d.length < conf.nswppo*BY2PG) - error("swap device too small"); - if(d.length < conf.nswap*BY2PG){ - conf.nswap = d.length/BY2PG; - swapalloc.top = &swapalloc.swmap[conf.nswap]; - swapalloc.free = conf.nswap; - } - } - c->flag &= ~CCACHE; - cclunk(c); - swapimage.c = c; - poperror(); -} diff --git a/sys/src/9/ppc/blast b/sys/src/9/ppc/blast index 8dd4b8028..297a0543b 100644 --- a/sys/src/9/ppc/blast +++ b/sys/src/9/ppc/blast @@ -1,6 +1,7 @@ dev root cons + swap env flash pipe diff --git a/sys/src/9/ppc/main.c b/sys/src/9/ppc/main.c index 93b81fd48..9beb2ed09 100644 --- a/sys/src/9/ppc/main.c +++ b/sys/src/9/ppc/main.c @@ -84,7 +84,6 @@ main(void) links(); chandevreset(); pageinit(); - swapinit(); sharedseginit(); fpsave(&initfp); initfp.fpscr = 0; diff --git a/sys/src/9/ppc/mkfile b/sys/src/9/ppc/mkfile index 0ced83b9c..e18b21b54 100644 --- a/sys/src/9/ppc/mkfile +++ b/sys/src/9/ppc/mkfile @@ -31,7 +31,6 @@ PORT=\ qlock.$O\ rdb.$O\ segment.$O\ - swap.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\ diff --git a/sys/src/9/sgi/indy b/sys/src/9/sgi/indy index 175e4e07c..ec5544029 100644 --- a/sys/src/9/sgi/indy +++ b/sys/src/9/sgi/indy @@ -1,6 +1,7 @@ dev root cons + swap uart mnt srv diff --git a/sys/src/9/sgi/main.c b/sys/src/9/sgi/main.c index 2ed77a5b6..f1c239e3a 100644 --- a/sys/src/9/sgi/main.c +++ b/sys/src/9/sgi/main.c @@ -192,8 +192,6 @@ main(void) initseg(); links(); chandevreset(); - - swapinit(); userinit(); schedinit(); panic("schedinit returned"); diff --git a/sys/src/9/sgi/mkfile b/sys/src/9/sgi/mkfile index 87e5ac7b3..1e39e89f8 100644 --- a/sys/src/9/sgi/mkfile +++ b/sys/src/9/sgi/mkfile @@ -38,7 +38,6 @@ PORT=\ rdb.$O\ rebootcmd.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/teg2/main.c b/sys/src/9/teg2/main.c index 30c46647d..8b1cddb9d 100644 --- a/sys/src/9/teg2/main.c +++ b/sys/src/9/teg2/main.c @@ -455,7 +455,6 @@ main(void) // i8250console(); /* too early; see init0 */ pageinit(); /* prints "1020M memory: ⋯ */ - swapinit(); userinit(); /* diff --git a/sys/src/9/teg2/mkfile b/sys/src/9/teg2/mkfile index 7c7eb8752..d964d125f 100644 --- a/sys/src/9/teg2/mkfile +++ b/sys/src/9/teg2/mkfile @@ -34,7 +34,6 @@ PORT=\ qio.$O\ qlock.$O\ segment.$O\ - swap.$O\ syscallfmt.$O\ sysfile.$O\ sysproc.$O\ diff --git a/sys/src/9/teg2/ts b/sys/src/9/teg2/ts index 236e5923e..67cd09d2d 100644 --- a/sys/src/9/teg2/ts +++ b/sys/src/9/teg2/ts @@ -2,6 +2,7 @@ dev root cons + swap env pipe proc diff --git a/sys/src/9/xen/main.c b/sys/src/9/xen/main.c index f58f4628e..f5e0adf07 100644 --- a/sys/src/9/xen/main.c +++ b/sys/src/9/xen/main.c @@ -103,8 +103,6 @@ main(void) // conf.monitor = 1; chandevreset(); pageinit(); - - swapinit(); userinit(); schedinit(); } diff --git a/sys/src/9/xen/mkfile b/sys/src/9/xen/mkfile index 5d9633439..3d83df4fb 100644 --- a/sys/src/9/xen/mkfile +++ b/sys/src/9/xen/mkfile @@ -32,7 +32,6 @@ PORT=\ qlock.$O\ rebootcmd.$O\ segment.$O\ - swap.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\ diff --git a/sys/src/9/xen/xenpcf b/sys/src/9/xen/xenpcf index 56cfd341f..479729163 100644 --- a/sys/src/9/xen/xenpcf +++ b/sys/src/9/xen/xenpcf @@ -1,6 +1,7 @@ dev root netif cons + swap uart arch env diff --git a/sys/src/9/zynq/main.c b/sys/src/9/zynq/main.c index b29c288bb..c281a5330 100644 --- a/sys/src/9/zynq/main.c +++ b/sys/src/9/zynq/main.c @@ -393,7 +393,6 @@ main(void) archinit(); chandevreset(); pageinit(); - swapinit(); screeninit(); userinit(); schedinit(); diff --git a/sys/src/9/zynq/mkfile b/sys/src/9/zynq/mkfile index 5ba3628e9..fca5f7f5c 100644 --- a/sys/src/9/zynq/mkfile +++ b/sys/src/9/zynq/mkfile @@ -31,7 +31,6 @@ PORT=\ qio.$O\ qlock.$O\ segment.$O\ - swap.$O\ sysfile.$O\ sysproc.$O\ taslock.$O\ diff --git a/sys/src/9/zynq/zynq b/sys/src/9/zynq/zynq index 1ff7bc788..f627ec1b4 100644 --- a/sys/src/9/zynq/zynq +++ b/sys/src/9/zynq/zynq @@ -1,6 +1,7 @@ dev root cons + swap arch uart mnt -- cgit v1.2.3