diff options
author | mischief <mischief@offblast.org> | 2014-06-24 18:02:25 -0700 |
---|---|---|
committer | mischief <mischief@offblast.org> | 2014-06-24 18:02:25 -0700 |
commit | 5ba95fdb07ddc2c32111a1b2f57f17aa27fcbbf5 (patch) | |
tree | c1ec54cb9ecff85b0b820a26d26a10a32a118d0c /sys/src/9/xen/main.c | |
parent | fa03455b5057675b18d1c87aef2d1071b2088de0 (diff) |
import xen 32 bit paravirtual kernel from /n/sources/xen.
Diffstat (limited to 'sys/src/9/xen/main.c')
-rw-r--r-- | sys/src/9/xen/main.c | 801 |
1 files changed, 801 insertions, 0 deletions
diff --git a/sys/src/9/xen/main.c b/sys/src/9/xen/main.c new file mode 100644 index 000000000..966b39997 --- /dev/null +++ b/sys/src/9/xen/main.c @@ -0,0 +1,801 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "init.h" +#include "pool.h" +#include "reboot.h" +#include <tos.h> + +Mach *m; + +#define BOOTARGS (xenstart->cmd_line) +#define BOOTARGSLEN (sizeof xenstart->cmd_line) +#define MAXCONF 64 + +enum { + /* space for syscall args, return PC, top-of-stack struct */ + Ustkheadroom = sizeof(Sargs) + sizeof(uintptr) + sizeof(Tos), +}; + +char bootdisk[KNAMELEN]; +Conf conf; +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; +uchar *sp; /* user stack of init proc */ +int idle_spin; + +static void +options(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getfields(cp, line, MAXCONF, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +void +main(void) +{ + mach0init(); + options(); + quotefmtinstall(); + xenconsinit(); + + //consdebug = rdb; + print("\nPlan 9 (%s)\n", xenstart->magic); + + cpuidentify(); + // meminit() is not for us + confinit(); + archinit(); + xinit(); + trapinit(); + printinit(); + cpuidprint(); + mmuinit(); + if(arch->intrinit) /* launches other processors on an mp */ + arch->intrinit(); + timersinit(); + mathinit(); + kbdenable(); + xengrantinit(); + if(arch->clockenable) + arch->clockenable(); + procinit0(); + initseg(); + + links(); +// conf.monitor = 1; + chandevreset(); + pageinit(); + + swapinit(); + userinit(); + active.thunderbirdsarego = 1; + schedinit(); +} + +void +mach0init(void) +{ + m = (Mach*)MACHADDR; + m->machno = 0; + conf.nmach = 1; + MACHP(0) = (Mach*)CPU0MACH; + m->pdb = (ulong*)xenstart->pt_base; +#ifdef NOT + m->gdt = (Segdesc*)CPU0GDT; +#endif + + machinit(); + + active.machs = 1; + active.exiting = 0; +} + +void +machinit(void) +{ + int machno; + ulong *pdb; + Segdesc *gdt; + + machno = m->machno; + pdb = m->pdb; + gdt = m->gdt; + memset(m, 0, sizeof(Mach)); + m->machno = machno; + m->pdb = pdb; + m->gdt = gdt; + m->perf.period = 1; + + /* + * For polled uart output at boot, need + * a default delay constant. 100000 should + * be enough for a while. Cpuidentify will + * calculate the real value later. + */ + m->loopconst = 100000; + m->cpumhz = 1000; // XXX! + + HYPERVISOR_shared_info = (shared_info_t*)mmumapframe(XENSHARED, (xenstart->shared_info)>>PGSHIFT); + + // XXX m->shared = &HYPERVISOR_shared_info->vcpu_data[m->machno]; +} + +void +init0(void) +{ + int i; + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + chandevinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", arch->id, conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "386", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + ksetenv("readparts", "1", 0); + for(i = 0; i < nconf; i++){ + if(confname[i][0] != '*') + ksetenv(confname[i], confval[i], 0); + ksetenv(confname[i], confval[i], 1); + } + poperror(); + } + + kproc("alarm", alarmkproc, 0); + touser(sp); +} + +void +userinit(void) +{ + Proc *p; + Segment *s; + KMap *k; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + p->fpstate = FPinit; + fpoff(); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * 4 bytes for gotolabel's return PC + */ + p->sched.pc = (ulong)init0; + p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD); + + /* + * User Stack + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG); + p->seg[SSEG] = s; + pg = newpage(1, 0, USTKTOP-BY2PG); + segpage(s, pg); + k = kmap(pg); + bootargs(VA(k)); + kunmap(k); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, 1); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(1, 0, UTZERO); + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + segpage(s, pg); + k = kmap(s->map[0]->pages[0]); + memmove((ulong*)VA(k), initcode, sizeof initcode); + kunmap(k); + ready(p); +} + +uchar * +pusharg(char *p) +{ + int n; + + n = strlen(p)+1; + sp -= n; + memmove(sp, p, n); + return sp; +} + +void +bootargs(ulong base) +{ + int i, ac; + uchar *av[32]; + uchar **lsp; + + sp = (uchar*)base + BY2PG - Ustkheadroom; + + ac = 0; + av[ac++] = pusharg("/386/9dos"); + av[ac++] = pusharg("-D"); + + /* 4 byte word align stack */ + sp = (uchar*)((ulong)sp & ~3); + + /* build argc, argv on stack */ + sp -= (ac+1)*sizeof(sp); + lsp = (uchar**)sp; + for(i = 0; i < ac; i++) + *lsp++ = av[i] + ((USTKTOP - BY2PG) - base); + *lsp = 0; + sp += (USTKTOP - BY2PG) - base - sizeof(ulong); +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +static void +writeconf(void) +{ + char *p, *q; + int n; + + p = getconfenv(); + + if(waserror()) { + free(p); + nexterror(); + } + + /* convert to name=value\n format */ + for(q=p; *q; q++) { + q += strlen(q); + *q = '='; + q += strlen(q); + *q = '\n'; + } + n = q - p + 1; + if(n >= BOOTARGSLEN) + error("kernel configuration too large"); + memmove(BOOTARGS, p, n); + poperror(); + free(p); +} + +void +confinit(void) +{ + char *p; + int i, userpcnt; + ulong kpages; + + for(i = 0; i < nconf; i++) + print("%s=%s\n", confname[i], confval[i]); + /* + * all ram above xentop is free, but must be mappable + * to virt addrs less than VIRT_START. + */ + kpages = PADDR(hypervisor_virt_start)>>PGSHIFT; + if(xenstart->nr_pages <= kpages) + kpages = xenstart->nr_pages; + else + print("Warning: Plan 9 / Xen limitation - " + "using only %lud of %lud available RAM pages\n", + kpages, xenstart->nr_pages); + xentop = PGROUND(PADDR(xentop)); + conf.mem[0].npage = kpages - (xentop>>PGSHIFT); + conf.mem[0].base = xentop; + + if(p = getconf("*kernelpercent")) + userpcnt = 100 - strtol(p, 0, 0); + else + userpcnt = 0; + + conf.npage = 0; + for(i=0; i<nelem(conf.mem); i++) + conf.npage += conf.mem[i].npage; + + conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5; + if(cpuserver) + conf.nproc *= 3; + if(conf.nproc > 2000) + conf.nproc = 2000; + conf.nimage = 200; + conf.nswap = conf.nproc*80; + conf.nswppo = 4096; + + if(cpuserver) { + if(userpcnt < 10) + userpcnt = 70; + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Hack for the big boys. Only good while physmem < 4GB. + * Give the kernel fixed max + enough to allocate the + * page pool. + * This is an overestimate as conf.upages < conf.npages. + * The patch of nimage is a band-aid, scanning the whole + * page list in imagereclaim just takes too long. + */ + if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){ + kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG; + conf.nimage = 2000; + kpages += (conf.nproc*KSTACK)/BY2PG; + } + } else { + if(userpcnt < 10) { + if(conf.npage*BY2PG < 16*MB) + userpcnt = 40; + else + userpcnt = 60; + } + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Make sure terminals with low memory get at least + * 4MB on the first Image chunk allocation. + */ + if(conf.npage*BY2PG < 16*MB) + imagmem->minarena = 4*1024*1024; + } + + /* + * can't go past the end of virtual memory + * (ulong)-KZERO is 2^32 - KZERO + */ + if(kpages > ((ulong)-KZERO)/BY2PG) + kpages = ((ulong)-KZERO)/BY2PG; + + conf.upages = conf.npage - kpages; + conf.ialloc = (kpages/2)*BY2PG; + + /* + * Guess how much is taken by the large permanent + * datastructures. Mntcache and Mntrpc are not accounted for + * (probably ~300KB). + */ + kpages *= BY2PG; + kpages -= conf.upages*sizeof(Page) + + conf.nproc*sizeof(Proc) + + conf.nimage*sizeof(Image) + + conf.nswap + + conf.nswppo*sizeof(Page); + mainmem->maxsize = kpages; + if(!cpuserver){ + /* + * give terminals lots of image memory, too; the dynamic + * allocation will balance the load properly, hopefully. + * be careful with 32-bit overflow. + */ + imagmem->maxsize = kpages; + } +} + +static char* mathmsg[] = +{ + nil, /* handled below */ + "denormalized operand", + "division by zero", + "numeric overflow", + "numeric underflow", + "precision loss", +}; + +static void +mathnote(void) +{ + int i; + ulong status; + char *msg, note[ERRMAX]; + + status = up->fpsave.status; + + /* + * Some attention should probably be paid here to the + * exception masks and error summary. + */ + msg = "unknown exception"; + for(i = 1; i <= 5; i++){ + if(!((1<<i) & status)) + continue; + msg = mathmsg[i]; + break; + } + if(status & 0x01){ + if(status & 0x40){ + if(status & 0x200) + msg = "stack overflow"; + else + msg = "stack underflow"; + }else + msg = "invalid operation"; + } + snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux", + msg, up->fpsave.pc, status); + postnote(up, 1, note, NDebug); +} + +/* + * math coprocessor error + */ +static void +matherror(Ureg *ur, void*) +{ + /* + * a write cycle to port 0xF0 clears the interrupt latch attached + * to the error# line from the 387 + */ + if(!(m->cpuiddx & 0x01)) + outb(0xF0, 0xFF); + + /* + * save floating point state to check out error + */ + fpenv(&up->fpsave); + mathnote(); + + if(ur->pc & KZERO) + panic("fp: status %ux fppc=0x%lux pc=0x%lux", + up->fpsave.status, up->fpsave.pc, ur->pc); +} + +/* + * math coprocessor emulation fault + */ +static void +mathemu(Ureg *ureg, void*) +{ + if(up->fpstate & FPillegal){ + /* someone did floating point in a note handler */ + postnote(up, 1, "sys: floating point in note handler", NDebug); + return; + } + switch(up->fpstate){ + case FPinit: + fpinit(); + up->fpstate = FPactive; + break; + case FPinactive: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + * More attention should probably be paid here to the + * exception masks and error summary. + */ + if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ + mathnote(); + break; + } + fprestore(&up->fpsave); + up->fpstate = FPactive; + break; + case FPactive: + panic("math emu pid %ld %s pc 0x%lux", + up->pid, up->text, ureg->pc); + break; + } +} + +/* + * math coprocessor segment overrun + */ +static void +mathover(Ureg*, void*) +{ + pexit("math overrun", 0); +} + +void +mathinit(void) +{ + trapenable(VectorCERR, matherror, 0, "matherror"); + //if(X86FAMILY(m->cpuidax) == 3) + // intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror"); + trapenable(VectorCNA, mathemu, 0, "mathemu"); + trapenable(VectorCSO, mathover, 0, "mathover"); +} + +/* + * set up floating point for a new process + */ +void +procsetup(Proc*p) +{ + p->fpstate = FPinit; + fpoff(); +} + +void +procfork(Proc *p) +{ + int s; + + p->kentry = up->kentry; + p->pcycles = -p->kentry; + + /* inherit user descriptors */ + memmove(p->gdt, up->gdt, sizeof(p->gdt)); + + /* copy local descriptor table */ + if(up->ldt != nil && up->nldt > 0){ + p->ldt = smalloc(sizeof(Segdesc) * up->nldt); + memmove(p->ldt, up->ldt, sizeof(Segdesc) * up->nldt); + p->nldt = up->nldt; + } + + /* save floating point state */ + s = splhi(); + switch(up->fpstate & ~FPillegal){ + case FPactive: + fpsave(&up->fpsave); + up->fpstate = FPinactive; + case FPinactive: + p->fpsave = up->fpsave; + p->fpstate = FPinactive; + } + splx(s); +} + +void +procrestore(Proc *p) +{ + uvlong t; + + if(p->kp) + return; + cycles(&t); + p->pcycles -= t; +} + +/* + * Save the mach dependent part of the process state. + */ +void +procsave(Proc *p) +{ + uvlong t; + + cycles(&t); + p->pcycles += t; + if(p->fpstate == FPactive){ + if(p->state == Moribund) + fpclear(); + else{ + /* + * Fpsave() stores without handling pending + * unmasked exeptions. Postnote() can't be called + * here as sleep() already has up->rlock, so + * the handling of pending exceptions is delayed + * until the process runs again and generates an + * emulation fault to activate the FPU. + */ + fpsave(&p->fpsave); + } + p->fpstate = FPinactive; + } + + /* + * While this processor is in the scheduler, the process could run + * on another processor and exit, returning the page tables to + * the free list where they could be reallocated and overwritten. + * When this processor eventually has to get an entry from the + * trashed page tables it will crash. + * + * If there's only one processor, this can't happen. + * You might think it would be a win not to do this in that case, + * especially on VMware, but it turns out not to matter. + */ + mmuflushtlb(0); +} + +static void +shutdown(int ispanic) +{ + int ms, once; + + lock(&active); + if(ispanic) + active.ispanic = ispanic; + else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0) + active.ispanic = 0; + once = active.machs & (1<<m->machno); + active.machs &= ~(1<<m->machno); + active.exiting = 1; + unlock(&active); + + if(once) + print("cpu%d: exiting\n", m->machno); + //spllo(); + for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){ + delay(TK2MS(2)); + if(active.machs == 0 && consactive() == 0) + break; + } + + if(getconf("*debug")) + delay(5*60*1000); + + if(active.ispanic){ + if(!cpuserver) + for(;;) + halt(); + delay(10000); + }else + delay(1000); +} + +void +reboot(void *entry, void *code, ulong size) +{ + void (*f)(ulong, ulong, ulong); + //ulong *pdb; + + writeconf(); + + shutdown(0); + + /* + * should be the only processor running now + */ + + print("shutting down...\n"); + delay(200); + + splhi(); + + /* turn off buffered serial console */ + serialoq = nil; + + /* shutdown devices */ + chandevshutdown(); + + /* reboot(0, ...) on Xen causes domU shutdown */ + if(entry == 0) + HYPERVISOR_shutdown(0); + + /* + * Modify the machine page table to directly map the low 4MB of memory + * This allows the reboot code to turn off the page mapping + */ + //pdb = m->pdb; + //pdb[PDX(0)] = pdb[PDX(KZERO)]; + mmuflushtlb(0); + + /* setup reboot trampoline function */ + f = (void*)REBOOTADDR; + memmove(f, rebootcode, sizeof(rebootcode)); + + print("rebooting...\n"); + + /* off we go - never to return */ + (*f)(PADDR(entry), PADDR(code), size); +} + + +void +exit(int ispanic) +{ + shutdown(ispanic); + arch->reset(); +} + +int +cistrcmp(char *a, char *b) +{ + int ac, bc; + + for(;;){ + ac = *a++; + bc = *b++; + + if(ac >= 'A' && ac <= 'Z') + ac = 'a' + (ac - 'A'); + if(bc >= 'A' && bc <= 'Z') + bc = 'a' + (bc - 'A'); + ac -= bc; + if(ac) + return ac; + if(bc == 0) + break; + } + return 0; +} + +int +cistrncmp(char *a, char *b, int n) +{ + unsigned ac, bc; + + while(n > 0){ + ac = *a++; + bc = *b++; + n--; + + if(ac >= 'A' && ac <= 'Z') + ac = 'a' + (ac - 'A'); + if(bc >= 'A' && bc <= 'Z') + bc = 'a' + (bc - 'A'); + + ac -= bc; + if(ac) + return ac; + if(bc == 0) + break; + } + + return 0; +} + |