diff options
author | mischief <mischief@offblast.org> | 2014-06-24 18:02:25 -0700 |
---|---|---|
committer | mischief <mischief@offblast.org> | 2014-06-24 18:02:25 -0700 |
commit | 5ba95fdb07ddc2c32111a1b2f57f17aa27fcbbf5 (patch) | |
tree | c1ec54cb9ecff85b0b820a26d26a10a32a118d0c | |
parent | fa03455b5057675b18d1c87aef2d1071b2088de0 (diff) |
import xen 32 bit paravirtual kernel from /n/sources/xen.
85 files changed, 20627 insertions, 0 deletions
diff --git a/sys/src/9/xen/archxen.c b/sys/src/9/xen/archxen.c new file mode 100644 index 000000000..721950223 --- /dev/null +++ b/sys/src/9/xen/archxen.c @@ -0,0 +1,115 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +static int +identify(void) +{ + m->havepge = 0; + return 0; +} + +static void +intrinit(void) +{ + ulong machs; + int i, ncpu; + char *cp; + char node[32]; + char buf[32]; + + if((cp = getconf("*nomp")) != nil && strtol(cp, 0, 0) != 0) + return; + ncpu = MAX_VIRT_CPUS; + if (cp = getconf("*ncpu")) { + ncpu = strtol(cp, 0, 0); + if (ncpu < 1) + ncpu = 1; + } + machs = 1; + for (i = 1; i < ncpu; i++) { + sprint(node, "cpu/%d/availability", i); + if (xenstore_read(node, buf, sizeof buf) <= 0) + break; + print("%s: %s\n", node, buf); + if (strcmp(buf, "online") == 0) { + machs |= 1<<i; + conf.nmach++; + } + } + if (conf.nmach > 1) { + print("Sorry, SMP not supported yet: 1 of %lud CPUs startd\n", conf.nmach); + conf.nmach = 1; + } +} + +static void +shutdown(void) +{ + HYPERVISOR_shutdown(1); +} + +int xenintrenable(Vctl *v); +int xenintrvecno(int irq); +int xenintrdisable(int irq); +void xentimerenable(void); +uvlong xentimerread(uvlong*); +void xentimerset(uvlong); + +PCArch archxen = { +.id= "Xen", +.ident= identify, +.reset= shutdown, +.intrinit= intrinit, +.intrenable= xenintrenable, +.intrvecno= xenintrvecno, +.intrdisable= xenintrdisable, +.clockenable= xentimerenable, +.fastclock= xentimerread, +.timerset= xentimerset, +}; + +/* + * Placeholders to satisfy external references in generic devarch.c + */ +ulong getcr4(void) { return 0; } +void putcr4(ulong) {} +int inb(int) { return 0; } +ushort ins(int) { return 0; } +ulong inl(int) { return 0; } +void outb(int, int) {} +void outs(int, ushort) {} +void outl(int, ulong) {} +void i8042reset(void) {} +void i8253enable(void) {} +void i8253init(void) {} +void i8253link(void) {} +uvlong i8253read(uvlong*) { return 0; } +void i8253timerset(uvlong) {} +int i8259disable(int) { return 0; } +int i8259enable(Vctl*) { return 0; } +void i8259init(void) {} +int i8259isr(int) { return 0; } +void i8259on(void) {} +void i8259off(void) {} +int i8259vecno(int) { return 0; } +int mtrrprint(char*, long) { return 0; } +int mtrr(uvlong, uvlong, char *) { return 0; } + +/* + * XXX until fpsave is debugged + */ +void +fpssesave(FPsave* f) +{ + fpx87save(f); +} + +void +fpsserestore(FPsave* f) +{ + fpx87restore(f); +} diff --git a/sys/src/9/xen/cppx b/sys/src/9/xen/cppx new file mode 100755 index 000000000..77370d014 --- /dev/null +++ b/sys/src/9/xen/cppx @@ -0,0 +1,41 @@ +#!/bin/rc +awk ' +function qq(s) { + gsub("\"", "£", s) + return "\"@" s "\"" +} + +/^#include/ { next +} +/^#define.*\\$/ { + save[n++] = $0 + print qq($0 "\\") + next +} +(n > 0) && /\\$/ { + save[n++] = $0 + print qq($0 "\\") + next +} +(n > 0) { + save[n++] = $0 + print qq($0) + for (i = 0; i < n; i++) print save[i] + n = 0 + next +} +/^# *((define)|(error)).*[^\\]*$/ { + print qq($0) + print + next +} +/^# *undef.*[^\\]*$/ { + print + print qq($0) + next +} + { print +} +' $* | +cpp -P | +sed -e 's/£/"/g' -e 's/^"@(.*\\)\\"$/\1/' -e 's/^"@(.*)"$/\1/' diff --git a/sys/src/9/xen/dat.h b/sys/src/9/xen/dat.h new file mode 100644 index 000000000..4114fabab --- /dev/null +++ b/sys/src/9/xen/dat.h @@ -0,0 +1,53 @@ +#include "../pc/dat.h" + +typedef unsigned char uint8_t; +typedef unsigned char uint8; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef unsigned long long uint64_t; +typedef char int8_t; +typedef short int16_t; +typedef long int32_t; +typedef long long int64_t; + +#define __attribute__(x) +enum { + EINVAL, + EACCES, + EEXIST, + EISDIR, + ENOENT, + ENOMEM, + ENOSPC, + EIO, + ENOTEMPTY, + ENOSYS, + EROFS, + EBUSY, + EAGAIN, + EISCONN, +}; + +#include "xendat.h" + +#undef mk_unsigned_long +#define mk_unsigned_long(x) ((unsigned long)(x)) + +#ifndef set_xen_guest_handle +#define set_xen_guest_handle(hnd, val) hnd = val +#endif + +extern ulong hypervisor_virt_start; +extern ulong *patomfn, *matopfn; +extern start_info_t *xenstart; +extern ulong xentop; +extern shared_info_t *HYPERVISOR_shared_info; + +/* + * Fake kmap + * XXX is this still viable? + */ +#undef VA +#define VA(k) ((ulong)(k)) +#define kmap(p) (KMap*)((p)->pa|KZERO) +#define kunmap(k) diff --git a/sys/src/9/xen/devrtc.c b/sys/src/9/xen/devrtc.c new file mode 100644 index 000000000..52db3ca22 --- /dev/null +++ b/sys/src/9/xen/devrtc.c @@ -0,0 +1,96 @@ +/* + * Xen wall clock + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum{ + Qdir = 0, + Qrtc, +}; + +Dirtab rtcdir[]={ + ".", {Qdir, 0, QTDIR}, 0, 0555, + "rtc", {Qrtc, 0}, 0, 0664, +}; + +static long +rtcread(Chan *c, void *a, long n, vlong offset) +{ + if(c->qid.type & QTDIR) + return devdirread(c, a, n, rtcdir, nelem(rtcdir), devgen); + + switch((ulong)c->qid.path){ + case Qrtc: + return readnum((ulong)offset, a, n, xenwallclock(), 12); + } + error(Ebadarg); + return 0; +} + +static long +rtcwrite(Chan*c, void*, long n, vlong) +{ + switch((ulong)c->qid.path){ + case Qrtc: + return n; + } + error(Eperm); + return 0; +} + +static Chan* +rtcattach(char* spec) +{ + return devattach('r', spec); +} + +static Walkqid* +rtcwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen); +} + +static int +rtcstat(Chan* c, uchar* dp, int n) +{ + return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen); +} + +static Chan* +rtcopen(Chan* c, int omode) +{ + return devopen(c, omode, rtcdir, nelem(rtcdir), devgen); +} + +static void +rtcclose(Chan*) +{ +} + +Dev rtcdevtab = { + 'r', + "rtc", + + devreset, + devinit, + devshutdown, + rtcattach, + rtcwalk, + rtcstat, + rtcopen, + devcreate, + rtcclose, + rtcread, + devbread, + rtcwrite, + devbwrite, + devremove, + devwstat, +}; + diff --git a/sys/src/9/xen/devxenstore.c b/sys/src/9/xen/devxenstore.c new file mode 100644 index 000000000..2004aecc1 --- /dev/null +++ b/sys/src/9/xen/devxenstore.c @@ -0,0 +1,590 @@ +/* + * Driver for xenstore - database shared between domains, used by xenbus to + * communicate configuration info. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../pc/io.h" + +#define LOG(a) + +typedef struct Aux Aux; + +enum { + Qtopdir, + Qctl, + Qwatch, + WRITING = 0, + READING, + WATCHING, + MAXIO = 8*1024, +}; + +Dirtab xsdir[] = { + ".", {Qtopdir, 0, QTDIR}, 0, 0555, + "xenstore", {Qctl, 0}, 0, 0660, + "xenwatch", {Qwatch, 0}, 0, 0440, +}; + +struct { + struct xenstore_domain_interface *intf; + struct xsd_sockmsg hdr; + int hdrvalid; + int evtchn; + int nextreqid; + Aux *rhead; + Aux *kernelaux; + Queue *evq; + Rendez wr; + Rendez rr; + QLock; + Lock rlock; +} xenstore; + +struct Aux { + QLock; + Rendez qr; + Queue *ioq; + Aux *next; + int state; + int reqid; +}; + +static char Ephase[] = "phase error"; +static char Eproto[] = "protocol error"; +static char NodeShutdown[] = "control/shutdown"; + +static void xenbusproc(void*); + +static int +notfull(void*) +{ + struct xenstore_domain_interface *xs = xenstore.intf; + + return (xs->req_prod-xs->req_cons) < XENSTORE_RING_SIZE; +} + +static int +notempty(void*) +{ + struct xenstore_domain_interface *xs = xenstore.intf; + + return xs->rsp_prod > xs->rsp_cons; +} + +static int +ishead(void* a) +{ + return xenstore.rhead == a; +} + +static void +xsintr(Ureg*, void*) +{ + LOG(dprint("xsintr\n");) + wakeup(&xenstore.rr); + wakeup(&xenstore.wr); +} + +static void +xwrite(Queue *q, char *buf, int len) +{ + struct xenstore_domain_interface *xs; + int m, n; + XENSTORE_RING_IDX idx; + + xs = xenstore.intf; + while (len > 0) { + n = XENSTORE_RING_SIZE - (xs->req_prod - xs->req_cons); + if (n == 0) { + xenchannotify(xenstore.evtchn); + sleep(&xenstore.wr, notfull, 0); + continue; + } + if (n > len) + n = len; + idx = MASK_XENSTORE_IDX(xs->req_prod); + m = XENSTORE_RING_SIZE - idx; + if (m > n) + m = n; + if (q) + qread(q, xs->req+idx, m); + else + memmove(xs->req+idx, buf, m); + if (m < n) { + if (q) + qread(q, xs->req, n-m); + else + memmove(xs->req, buf+m, n-m); + } + coherence(); + xs->req_prod += n; + xenchannotify(xenstore.evtchn); + if (buf) + buf += n; + len -= n; + } +} + +static void +xread(Queue *q, char *buf, int len) +{ + struct xenstore_domain_interface *xs = xenstore.intf; + int n, m; + XENSTORE_RING_IDX idx; + + for (n = len; n > 0; n -= m) { + while (xs->rsp_prod == xs->rsp_cons) { + xenchannotify(xenstore.evtchn); + if (up == 0) + HYPERVISOR_yield(); + else + sleep(&xenstore.rr, notempty, 0); + } + idx = MASK_XENSTORE_IDX(xs->rsp_cons); + m = xs->rsp_prod - xs->rsp_cons; + if (m > n) + m = n; + if (m > XENSTORE_RING_SIZE - idx) + m = XENSTORE_RING_SIZE - idx; + if (q) + qwrite(q, xs->rsp+idx, m); + else if (buf) { + memmove(buf, xs->rsp+idx, m); + buf += m; + } + coherence(); + xs->rsp_cons += m; + } + xenchannotify(xenstore.evtchn); +} + +static void +xsrpc(Aux *aux) +{ + Queue *q; + Aux *l, *r, **lp; + struct xsd_sockmsg hdr; + long n; + + q = aux->ioq; + + if (aux->state == WATCHING) + aux->reqid = 0; + else { + /* get the request header and check validity */ + if (qlen(q) < sizeof hdr) + error(Eproto); + qread(q, &hdr, sizeof hdr); + n = hdr.len; + if (qlen(q) != n) + error(Eproto); + qlock(&xenstore); + /* generate a unique request id */ + aux->reqid = ++xenstore.nextreqid; + hdr.req_id = aux->reqid; + hdr.tx_id = 0; + /* send the request */ + xwrite(0, (char*)&hdr, sizeof hdr); + xwrite(q, 0, n); + qunlock(&xenstore); + } + + /* join list of requests awaiting response */ + ilock(&xenstore.rlock); + if (xenstore.rhead == 0) { + aux->next = 0; + xenstore.rhead = aux; + } else { + aux->next = xenstore.rhead->next; + xenstore.rhead->next = aux; + } + iunlock(&xenstore.rlock); + + /* loop until matching response header has been received */ + if (waserror()) { + ilock(&xenstore.rlock); + for (lp = &xenstore.rhead; *lp && *lp != aux; lp = &(*lp)->next) + ; + if (*lp != 0) { + *lp = (*lp)->next; + if (lp == &xenstore.rhead && *lp) + wakeup(&(*lp)->qr); + } + iunlock(&xenstore.rlock); + nexterror(); + } + for (;;) { + /* wait until this request reaches head of queue */ + if (xenstore.rhead != aux) + sleep(&aux->qr, ishead, aux); + /* wait until a response header (maybe for another request) has been read */ + if (!xenstore.hdrvalid) { + xread(0, (char*)&xenstore.hdr, sizeof xenstore.hdr); + xenstore.hdrvalid = 1; + } + if (xenstore.hdr.req_id == aux->reqid) + break; + /* response was for a different request: move matching request to head of queue */ + ilock(&xenstore.rlock); + for (l = xenstore.rhead; r = l->next; l = r) + if (xenstore.hdr.req_id == r->reqid) { + l->next = r->next; + r->next = xenstore.rhead; + xenstore.rhead = r; + break; + } + iunlock(&xenstore.rlock); + if (r) { + /* wake the matching request */ + wakeup(&r->qr); + } else { + /* response without a request: should be a watch event */ + xenstore.hdrvalid = 0; + xread(0, 0, xenstore.hdr.len); + continue; + } + } + + /* queue the response header, and data if any, for the caller to read */ + qwrite(q, &xenstore.hdr, sizeof xenstore.hdr); + xenstore.hdrvalid = 0; + /* read the data, if any */ + if (xenstore.hdr.len > 0) + xread(q, 0, xenstore.hdr.len); + + /* remove finished request and wake the next request on the queue */ + ilock(&xenstore.rlock); + xenstore.rhead = aux->next; + iunlock(&xenstore.rlock); + poperror(); + if (xenstore.rhead != 0) + wakeup(&xenstore.rhead->qr); +} + +static void +xsreset() +{ + LOG(dprint("xsreset\n");) +} + +static void +xsinit() +{ + intrenable(xenstore.evtchn, xsintr, 0, BUSUNKNOWN, "Xen store"); + kproc("xenbus", xenbusproc, 0); +} + +static Chan* +xsattach(char *spec) +{ + return devattach('x', spec); +} + +static Walkqid* +xswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, xsdir, nelem(xsdir), devgen); +} + +static int +xsstat(Chan *c, uchar *dp, int n) +{ + return devstat(c, dp, n, xsdir, nelem(xsdir), devgen); +} + +static Aux* +auxalloc(int initstate) +{ + Aux *aux; + Queue *q; + + aux = mallocz(sizeof(Aux), 1); + if (aux == 0) + return 0; + q = qopen(MAXIO, 0, 0, 0); + if (q == 0) { + free(aux); + return 0; + } + qnoblock(q, 1); + aux->state = initstate; + aux->ioq = q; + return aux; +} + +static Chan* +xsopen(Chan *c, int omode) +{ + Aux *aux; + int state; + + c = devopen(c, omode, xsdir, nelem(xsdir), devgen); + state = WRITING; + switch ((ulong)c->qid.path) { + case Qwatch: + state = WATCHING; + /* fall through */ + case Qctl: + aux = auxalloc(state); + if (aux == 0) { + c->flag &= ~COPEN; + error(Enomem); + } + c->aux = aux; + break; + } + return c; +} + +static void +xsclose(Chan* c) +{ + Aux *aux; + + if ((c->flag&COPEN) == 0) + return; + + switch ((ulong)c->qid.path) { + case Qwatch: + case Qctl: + if ((aux = (Aux*)c->aux) != 0) { + qfree(aux->ioq); + free(aux); + c->aux = 0; + } + break; + } +} + +static long +xsread(Chan *c, void *a, long n, vlong off) +{ + Aux *aux; + Queue *q; + long nr; + + USED(off); + if (c->qid.type == QTDIR) + return devdirread(c, a, n, xsdir, nelem(xsdir), devgen); + + aux = (Aux*)c->aux; + qlock(aux); + if (waserror()) { + qunlock(aux); + nexterror(); + } + q = aux->ioq; + switch (aux->state) { + case WRITING: + if (qlen(q) == 0) + error(Ephase); + xsrpc(aux); + aux->state = READING; + break; + case WATCHING: + if (qlen(q) == 0) + xsrpc(aux); + break; + } + if (!qcanread(q)) + nr = 0; + else + nr = qread(q, a, n); + qunlock(aux); + poperror(); + return nr; +} + +static long +xswrite(Chan *c, void *a, long n, vlong off) +{ + Aux *aux; + Queue *q; + long nr; + + if (c->qid.type == QTDIR) + error(Eperm); + if ((ulong)c->qid.path == Qwatch) + error(Ebadusefd); + + aux = (Aux*)c->aux; + qlock(aux); + if (waserror()) { + qunlock(aux); + nexterror(); + } + q = aux->ioq; + if ((off == 0 || aux->state == READING) && qlen(q) > 0) + qflush(q); + aux->state = WRITING; + nr = qwrite(aux->ioq, a, n); + qunlock(aux); + poperror(); + return nr; +} + +Dev xenstoredevtab = { + 'x', + "xenstore", + + xsreset, + xsinit, + devshutdown, + xsattach, + xswalk, + xsstat, + xsopen, + devcreate, + xsclose, + xsread, + devbread, + xswrite, + devbwrite, + devremove, + devwstat, +}; + +static char* +xscmd(Aux *aux, char *buf, int cmd, char *s, char *val) +{ + struct xsd_sockmsg *msg; + char *arg; + long n; + + msg = (struct xsd_sockmsg*)buf; + arg = buf + sizeof(*msg); + msg->type = cmd; + msg->len = strlen(s)+1; + if (val) { + msg->len += strlen(val); + if (cmd == XS_WATCH) + msg->len++; /* stupid special case */ + } + strcpy(arg, s); + if (val) + strcpy(arg+strlen(s)+1, val); + n = sizeof(*msg)+msg->len; + if (up == 0) { + msg->req_id = 1; + msg->tx_id = 0; + xwrite(0, buf, n); + xread(0, buf, sizeof(*msg)); + xread(0, arg, msg->len); + } else { + qlock(aux); + if (qlen(aux->ioq) > 0) + qflush(aux->ioq); + qwrite(aux->ioq, buf, n); + xsrpc(aux); + qread(aux->ioq, buf, sizeof(*msg)); + LOG(dprint("xs: type %d req_id %d len %d\n", msg->type, msg->req_id, msg->len);) + // XXX buffer overflow + qread(aux->ioq, arg, msg->len); + qunlock(aux); + } + arg[msg->len] = 0; + if (msg->type == XS_ERROR) { + return 0; + } + return arg; +} + +static void +intfinit(void) +{ + if (xenstore.intf == 0) { + xenstore.intf = (struct xenstore_domain_interface*)mmumapframe(XENBUS, xenstart->store_mfn); + xenstore.evtchn = xenstart->store_evtchn; + xenstore.kernelaux = auxalloc(WRITING); + } +} + +void +xenstore_write(char *s, char *val) +{ + char buf[512]; + + intfinit(); + xscmd(xenstore.kernelaux, buf, XS_WRITE, s, val); +} + +int +xenstore_read(char *s, char *val, int len) +{ + char buf[512]; + char *p; + + intfinit(); + p = xscmd(xenstore.kernelaux, buf, XS_READ, s, nil); + if (p == 0) + return -1; + strecpy(val, val+len, p); + return 1; +} + +void +xenstore_setd(char *dir, char *node, int value) +{ + int off; + char buf[12]; + + off = strlen(dir); + sprint(dir+off, "%s", node); + sprint(buf, "%ud", value); + xenstore_write(dir, buf); + dir[off] = 0; +} + +int +xenstore_gets(char *dir, char *node, char *buf, int buflen) +{ + int off; + int n; + + off = strlen(dir); + sprint(dir+off, "%s", node); + n = xenstore_read(dir, buf, buflen); + dir[off] = 0; + return n; +} + +static void +xenbusproc(void*) +{ + Chan *c; + Aux *aux; + char *p; + struct xsd_sockmsg msg; + char buf[512]; + int n, m; + + c = namec("#x/xenstore", Aopen, ORDWR, 0); + aux = (Aux*)c->aux; + c = namec("#x/xenwatch", Aopen, OREAD, 0); + xscmd(aux, buf, XS_WATCH, NodeShutdown, "$"); + for (;;) { + xsread(c, &msg, sizeof(msg), 0); + for (n = msg.len; n > 0; n -= m) + m = xsread(c, buf, msg.len, sizeof(msg)); + buf[msg.len] = 0; + if (strcmp(buf, NodeShutdown) != 0) + continue; + p = xscmd(aux, buf, XS_READ, NodeShutdown, nil); + if (p == nil) + continue; + if (strcmp(p, "poweroff") == 0) + reboot(nil, nil, 0); + else if (strcmp(p, "reboot") == 0) + exit(0); + else { + print("xenbus: %s=%s\n", NodeShutdown, p); + xscmd(aux, buf, XS_WRITE, NodeShutdown, ""); + } + } +} diff --git a/sys/src/9/xen/dpart.c b/sys/src/9/xen/dpart.c new file mode 100644 index 000000000..17d3e4a3a --- /dev/null +++ b/sys/src/9/xen/dpart.c @@ -0,0 +1,248 @@ +#include <u.h> +#include <libc.h> +#include <disk.h> + +typedef void Fs; +#include "/sys/src/boot/pc/dosfs.h" + +enum { + Npart = 32 +}; + +#define GSHORT(p) (((p)[1]<<8)|(p)[0]) +#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p)) + +int +readdisk(Disk *d, void *buf, vlong off, int len) +{ + if(seek(d->fd, off, 0) == -1 + || read(d->fd, buf, len) != len) + return -1; + return 0; +} + +void +addpart(Disk *d, char *name, ulong s, ulong e) +{ + print("%s: part %s %lud %lud\n", d->prefix, name, s, e); + fprint(d->ctlfd, "part %s %lud %lud\n", name, s, e); +} + +int +isdos(int t) +{ + return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X; +} + +int +isextend(int t) +{ + return t==EXTEND || t==EXTHUGE || t==LEXTEND; +} + + + +/* build a cdboot partition if there is an embedded boot floppy image */ +int +cdpart(Disk *d) +{ + uchar buf[2048]; + ulong a, n; + uchar *p; + + if(readdisk(d, buf, 17*2048, 2048) == -1 + || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0) + return 0; + + p = buf + 0x47; + a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + + if(readdisk(d, buf, a*2048, 2048) == -1 + || memcmp((char*)buf, "\x01\x00\x00\x00", 4) != 0 + || memcmp((char*)buf+30, "\x55\xAA", 2) != 0 + || buf[0x20] != 0x88) + return 0; + + p = buf+0x28; + a = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24); + + switch(buf[0x21]) { + case 1: n = 1200*1024; break; + case 2: n = 1440*1024; break; + case 3: n = 2880*1024; break; + default: return 0; + } + + a = a * (uvlong)2048 / d->secsize; + n /= d->secsize; + addpart(d, "cdboot", a, a+n); + return 1; +} + +int +p9part(Disk *d, char *name, ulong pstart) +{ + char partbuf[512]; + char *field[4], *line[Npart+1], *name2; + ulong start, end; + int i, n; + + name2 = smprint("%s%s", d->prefix, name); + d = opendisk(name2, 1, 0); + if(!d) { + fprint(2, "%s: %r\n", name2); + free(name2); + return 0; + } + free(name2); + + if(readdisk(d, partbuf, 512, sizeof partbuf) == -1) + return 0; + partbuf[sizeof partbuf - 1] = '\0'; + if(strncmp(partbuf, "part ", 5) != 0 + || (n = getfields(partbuf, line, Npart+1, 0, "\n")) == 0) + return 0; + for(i = 0; i < n; i++) { + if(strncmp(line[i], "part ", 5) != 0) + break; + if(getfields(line[i], field, 4, 0, " ") != 4) + break; + start = strtoul(field[2], 0, 0); + end = strtoul(field[3], 0, 0); + if(start >= end) + break; + addpart(d, field[1], pstart+start, pstart+end); + } + return 0; +} + +int +mbrpart(Disk *d) +{ + uchar mbrbuf[512]; + char name[10]; + Dospart *dp; + ulong taboffset, start, end; + ulong firstxpart, nxtxpart; + int i, nplan9, havedos; + +#define readmbr() \ + if(readdisk(d, mbrbuf, (uvlong)taboffset*512, sizeof mbrbuf) == -1 \ + || mbrbuf[0x1FE] != 0x55 || mbrbuf[0x1FF] != 0xAA) \ + return 0 + + if(d->secsize > 512) + return 0; + dp = (Dospart*)&mbrbuf[0x1BE]; + taboffset = 0; + + if(1) { + /* get the MBR (allowing for DMDDO) */ + readmbr(); + for(i = 0; i < 4; i++) { + if(dp[i].type == DMDDO) { + taboffset = 63; + readmbr(); + i = -1; /* start over */ + } + } + } + + /* + * Read the partitions, first from the MBR and then + * from successive extended partition tables. + */ + nplan9 = 0; + havedos = 0; + firstxpart = 0; + for(;;) { + readmbr(); + nxtxpart = 0; + for(i = 0; i < 4; i++) { + /* partition offsets are relative to taboffset */ + start = taboffset+GLONG(dp[i].start); + end = start+GLONG(dp[i].len); + if(dp[i].type == PLAN9) { + if(nplan9 == 0) + strcpy(name, "plan9"); + else + sprint(name, "plan9.%d", nplan9); + addpart(d, name, start, end); + p9part(d, name, start); + nplan9++; + } + + if(!havedos && isdos(dp[i].type)) { + havedos = 1; + addpart(d, "dos", start, end); + } + + /* nxtxpart is relative to firstxpart (or 0), not taboffset */ + if(isextend(dp[i].type)) + nxtxpart = start-taboffset+firstxpart; + } + if(!nxtxpart) + break; + if(!firstxpart) + firstxpart = nxtxpart; + taboffset = nxtxpart; + } + return nplan9 + havedos; +} + +void +partall(void) +{ + Disk *d; + Dir *ent; + char *name; + int fd, i, n; + + fd = open("#S", OREAD); + if(fd == -1) { + fprint(2, "No disk\n"); + return; + } + + while((n = dirread(fd, &ent)) > 0) { + for(i = 0; i < n; i++) { + if(ent[i].mode & DMDIR) { + name = smprint("#S/%s/data", ent[i].name); + d = opendisk(name, 1, 0); + if(!d) { + fprint(2, "%s: %r\n", name); + continue; + } + // XXX not safe yet: if(!mbrpart(d) && !cdpart(d) && !p9part(d, "data", 0)) + if(!mbrpart(d) && !cdpart(d)) + fprint(2, "%s: no partitions\n", name); + close(d->fd); + } + } + } + close(fd); +} + + +void +main(int argc, char **argv) +{ + USED(argc, argv); + + fmtinstall('r', errfmt); + + bind("#c", "/dev", MBEFORE); + open("/dev/cons", OREAD); + open("/dev/cons", OWRITE); + open("/dev/cons", OWRITE); + + partall(); + + close(0); + close(1); + close(2); + + exec("/boot/boot2", argv); + + exits(0); +} diff --git a/sys/src/9/xen/etherxen.c b/sys/src/9/xen/etherxen.c new file mode 100644 index 000000000..143f254dc --- /dev/null +++ b/sys/src/9/xen/etherxen.c @@ -0,0 +1,494 @@ +/* + * Xen virtual network interface frontend + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" + +#define LOG(a) + +enum { + Nvif = 4, + Ntb = 16, + Nrb = 32, +}; + +typedef struct Ctlr Ctlr; +typedef union Txframe Txframe; +typedef union Rxframe Rxframe; + +struct Ctlr { + int attached; + int backend; + int vifno; + int evtchn; + int rxcopy; + Txframe *txframes; + Txframe *freetxframe; + Rxframe *rxframes; + netif_tx_front_ring_t txring; + netif_rx_front_ring_t rxring; + int *txrefs; + int *rxrefs; + int txringref; + int rxringref; + Lock txlock; + QLock attachlock; + Rendez wtxframe; + Rendez wtxblock; + + ulong interrupts; + ulong transmits; + ulong receives; + ulong txerrors; + ulong rxerrors; + ulong rxoverflows; +}; + +union Txframe { + struct { + Txframe *next; + char data[2]; + } tf; + uchar page[BY2PG]; +}; + +union Rxframe { + uchar page[BY2PG]; +}; + +static int nvif; + +/* + * conversions to machine page numbers, pages and addresses + */ +#define MFN(pa) (patomfn[(pa)>>PGSHIFT]) +#define MFNPG(pa) (MFN(pa)<<PGSHIFT) +#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa)) +#define VA2MA(va) PA2MA(PADDR(va)) + +static int +puttxrequest(Ctlr *ctlr, netif_tx_request_t *tr) +{ + netif_tx_request_t *req; + int i, notify; + + LOG(dprint("puttxrequest id %d ref %d size %d\n", tr->id, tr->gref, tr->size);) + i = ctlr->txring.req_prod_pvt; + req = RING_GET_REQUEST(&ctlr->txring, i); + memmove(req, tr, sizeof(*req)); + ctlr->txring.req_prod_pvt = i+1; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->txring, notify); + return notify; +} + +static int +putrxrequest(Ctlr *ctlr, netif_rx_request_t *rr) +{ + netif_rx_request_t *req; + int i; + int notify; + + LOG(dprint("putrxrequest %d %d\n", rr->id, rr->gref);) + i = ctlr->rxring.req_prod_pvt; + req = RING_GET_REQUEST(&ctlr->rxring, i); + memmove(req, rr, sizeof(*req)); + ctlr->rxring.req_prod_pvt = i+1; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->rxring, notify); + return notify; +} + +static int +gettxresponse(Ctlr *ctlr, netif_tx_response_t *tr) +{ + int i, avail; + netif_tx_response_t *rx; + + RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->txring, avail); + if (!avail) + return 0; + i = ctlr->txring.rsp_cons; + rx = RING_GET_RESPONSE(&ctlr->txring, i); + LOG(dprint("gettxresponse id %d status %d\n", rx->id, rx->status);) + if(rx->status) + ctlr->txerrors++; + *tr = *rx; + ctlr->txring.rsp_cons = ++i; + return 1; +} + +static int +getrxresponse(Ctlr *ctlr, netif_rx_response_t* rr) +{ + int i, avail; + netif_rx_response_t *rx; + + RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->rxring, avail); + if (!avail) + return 0; + i = ctlr->rxring.rsp_cons; + rx = RING_GET_RESPONSE(&ctlr->rxring, i); + LOG(dprint("getrxresponse id %d offset %d flags %ux status %d\n", rx->id, rx->offset, rx->flags, rx->status);) + *rr = *rx; + ctlr->rxring.rsp_cons = ++i; + return 1; +} + +static int +ringinit(Ctlr *ctlr, char *a) +{ + netif_tx_sring_t *txr; + netif_rx_sring_t *rxr; + + txr = (netif_tx_sring_t*)a; + memset(txr, 0, BY2PG); + SHARED_RING_INIT(txr); + FRONT_RING_INIT(&ctlr->txring, txr, BY2PG); + ctlr->txringref = shareframe(ctlr->backend, txr, 1); + + rxr = (netif_rx_sring_t*)(a+BY2PG); + SHARED_RING_INIT(rxr); + FRONT_RING_INIT(&ctlr->rxring, rxr, BY2PG); + ctlr->rxringref = shareframe(ctlr->backend, rxr, 1); + + return 2*BY2PG; +} + +static int +vifsend(Ctlr *ctlr, Block *bp) +{ + netif_tx_request_t tr; + Txframe *tx; + int id; + + ilock(&ctlr->txlock); + tx = ctlr->freetxframe; + ctlr->freetxframe = tx->tf.next; + iunlock(&ctlr->txlock); + id = tx - ctlr->txframes; + tr.gref = ctlr->txrefs[id]; + tr.offset = tx->tf.data - (char*)tx; + tr.flags = 0; // XXX checksum? + tr.id = id; + tr.size = BLEN(bp); + memmove(tx->tf.data, bp->rp, tr.size); + return puttxrequest(ctlr, &tr); +} + +static int +vifsenddone(Ctlr *ctlr, netif_tx_response_t *tr) +{ + Txframe *tx; + + tx = &ctlr->txframes[tr->id]; // XXX check validity of id + ilock(&ctlr->txlock); + tx->tf.next = ctlr->freetxframe; + ctlr->freetxframe = tx; + iunlock(&ctlr->txlock); + return 1; +} + +static int +vifrecv(Ctlr *ctlr, Rxframe *rx) +{ + netif_rx_request_t rr; + int id; + int ref; + + id = rx - ctlr->rxframes; + if (ctlr->rxcopy) + ref = ctlr->rxrefs[id]; + else { + ref = donateframe(ctlr->backend, rx); + ctlr->rxrefs[id] = ref; + } + rr.id = id; + rr.gref = ref; + return putrxrequest(ctlr, &rr); +} + +static int +vifrecvdone(Ether *ether, netif_rx_response_t *rr) +{ + Ctlr *ctlr; + Rxframe *rx; + Block *bp; + int len; + + ctlr = ether->ctlr; + rx = &ctlr->rxframes[rr->id]; // XXX check validity of id + if (!ctlr->rxcopy) + acceptframe(ctlr->rxrefs[rr->id], rx); + if ((len = rr->status) <= 0) { + ctlr->rxerrors++; + vifrecv(ctlr, rx); + return 1; + } + if(len > sizeof(Etherpkt) || (bp = iallocb(sizeof(Etherpkt))) == nil) { + ctlr->rxoverflows++; + vifrecv(ctlr, rx); + return 1; + } + + ctlr->receives++; + memmove(bp->base, rx->page + rr->offset, len); + vifrecv(ctlr, rx); + + bp->rp = bp->base; + bp->wp = bp->rp + len; + bp->free = 0; + bp->next = 0; + bp->list = 0; + if (rr->flags & NETRXF_data_validated) + bp->flag |= Btcpck|Budpck; + etheriq(ether, bp, 1); + return 0; +} + +static int +wtxframe(void *a) +{ + return ((struct Ctlr*)a)->freetxframe != 0; +} + +static int +wtxblock(void *a) +{ + return qcanread(((struct Ether*)a)->oq); +} + +static void +etherxenproc(void *a) +{ + Ether *ether = a; + Ctlr *ctlr = ether->ctlr; + Block *bp; + int notify; + + for (;;) { + while (ctlr->freetxframe == 0) + sleep(&ctlr->wtxframe, wtxframe, ctlr); + while ((bp = qget(ether->oq)) == 0) + sleep(&ctlr->wtxblock, wtxblock, ether); + notify = vifsend(ctlr, bp); + freeb(bp); + if (notify) + xenchannotify(ctlr->evtchn); + } +} + +static void +etherxentransmit(Ether *ether) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + ctlr->transmits++; + wakeup(&ctlr->wtxblock); +} + +static void +etherxenintr(Ureg*, void *a) +{ + Ether *ether = a; + Ctlr *ctlr = ether->ctlr; + int txnotify; + netif_tx_response_t tr; + netif_rx_response_t rr; + + ctlr->interrupts++; + txnotify = 0; + while (getrxresponse(ctlr, &rr)) + vifrecvdone(ether, &rr); + while (gettxresponse(ctlr, &tr)) { + if (vifsenddone(ctlr, &tr)) + txnotify = 1; + } + if (txnotify) + wakeup(&ctlr->wtxframe); +} + +static long +etherxenctl(Ether *ether, void *buf, long n) +{ + uchar ea[Eaddrlen]; + Cmdbuf *cb; + + cb = parsecmd(buf, n); + if(cb->nf >= 2 + && strcmp(cb->f[0], "ea")==0 + && parseether(ea, cb->f[1]) == 0){ + free(cb); + memmove(ether->ea, ea, Eaddrlen); + memmove(ether->addr, ether->ea, Eaddrlen); + return 0; + } + free(cb); + error(Ebadctl); + return -1; /* not reached */ +} + +static void +backendconnect(Ctlr *ctlr) +{ + char dir[64]; + char buf[64]; + + sprint(dir, "device/vif/%d/", ctlr->vifno); + xenstore_setd(dir, "state", XenbusStateInitialising); + xenstore_setd(dir, "tx-ring-ref", ctlr->txringref); + xenstore_setd(dir, "rx-ring-ref", ctlr->rxringref); + xenstore_setd(dir, "event-channel", ctlr->evtchn); + print("etherxen: request-rx-copy=%d\n", ctlr->rxcopy); + if (ctlr->rxcopy) + xenstore_setd(dir, "request-rx-copy", 1); + xenstore_setd(dir, "state", XenbusStateConnected); + xenstore_gets(dir, "backend", buf, sizeof buf); + sprint(dir, "%s/", buf); + HYPERVISOR_yield(); + xenstore_gets(dir, "state", buf, sizeof buf); + while (strtol(buf, 0, 0) != XenbusStateConnected) { + print("etherxen: waiting for vif %d to connect\n", ctlr->vifno); + tsleep(&up->sleep, return0, 0, 1000); + xenstore_gets(dir, "state", buf, sizeof buf); + } +} + +static void +etherxenattach(Ether *ether) +{ + Ctlr *ctlr; + char *p; + Txframe *tx; + int npage, i; + + LOG(dprint("etherxenattach\n");) + ctlr = ether->ctlr; + qlock(&ctlr->attachlock); + if (ctlr->attached) { + qunlock(&ctlr->attachlock); + return; + } + + npage = 2 + Ntb + Nrb; + p = (char*)xspanalloc(npage<<PGSHIFT, BY2PG, 0); + p += ringinit(ctlr, p); + ctlr->txrefs = malloc(Ntb*sizeof(int)); + ctlr->rxrefs = malloc(Nrb*sizeof(int)); + ctlr->txframes = (Txframe*)p; + for (i = 0; i < Ntb; i++, p += BY2PG) { + tx = (Txframe*)p; + if (i != Ntb-1) + tx->tf.next = tx + 1; + else + tx->tf.next = 0; + ctlr->txrefs[i] = shareframe(ctlr->backend, tx, 0); + } + ctlr->freetxframe = ctlr->txframes; + ctlr->rxframes = (Rxframe*)p; + for (i = 0; i < Nrb; i++, p += BY2PG) { + if (ctlr->rxcopy) + ctlr->rxrefs[i] = shareframe(ctlr->backend, (Rxframe*)p, 1); + vifrecv(ctlr, (Rxframe*)p); + } + + ctlr->evtchn = xenchanalloc(ctlr->backend); + intrenable(ctlr->evtchn, etherxenintr, ether, BUSUNKNOWN, "vif"); + + kproc("vif", etherxenproc, ether); + backendconnect(ctlr); + ctlr->attached = 1; + qunlock(&ctlr->attachlock); +} + +static void +etherxenmulticast(void* arg, uchar* addr, int on) +{ + USED(arg, addr, on); +} + +static long +ifstat(Ether* ether, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + char *buf, *p; + int l, len; + + ctlr = ether->ctlr; + if(n == 0) + return 0; + if((p = malloc(READSTR)) == nil) + error(Enomem); + l = snprint(p, READSTR, "intr: %lud\n", ctlr->interrupts); + l += snprint(p+l, READSTR-l, "transmits: %lud\n", ctlr->transmits); + l += snprint(p+l, READSTR-l, "receives: %lud\n", ctlr->receives); + l += snprint(p+l, READSTR-l, "txerrors: %lud\n", ctlr->txerrors); + l += snprint(p+l, READSTR-l, "rxerrors: %lud\n", ctlr->rxerrors); + snprint(p+l, READSTR-l, "rxoverflows: %lud\n", ctlr->rxoverflows); + + buf = a; + len = readstr(offset, buf, n, p); + free(p); + + return len; +} + +static int +pnp(Ether* ether) +{ + uchar ea[Eaddrlen]; + char dir[64]; + char buf[64]; + Ctlr *ctlr; + int domid, rxcopy; + + if (nvif > Nvif) + return -1; + sprint(dir, "device/vif/%d/", nvif); + if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0) + return -1; + domid = strtol(buf, 0, 0); + if (xenstore_gets(dir, "mac", buf, sizeof buf) <= 0) + return -1; + if (parseether(ea, buf) < 0) + return -1; + if (xenstore_gets(dir, "backend", buf, sizeof buf) <= 0) + return 1; + sprint(dir, "%s/", buf); + rxcopy = 0; + if (xenstore_gets(dir, "feature-rx-copy", buf, sizeof buf) >= 0) + rxcopy = strtol(buf, 0, 0); + ether->ctlr = ctlr = malloc(sizeof(Ctlr)); + memset(ctlr, 0, sizeof(Ctlr)); + ctlr->backend = domid; + ctlr->vifno = nvif++; + ctlr->rxcopy = rxcopy; + + memmove(ether->ea, ea, sizeof ether->ea); + ether->mbps = 100; // XXX what speed? + ether->attach = etherxenattach; + ether->detach = nil; + ether->transmit = etherxentransmit; + ether->irq = -1; + ether->tbdf = BUSUNKNOWN; + ether->interrupt = etherxenintr; + ether->ifstat = ifstat; + ether->ctl = etherxenctl; + ether->promiscuous = nil; + ether->multicast = etherxenmulticast; + ether->arg = ether; + return 0; +} + +void +etherxenlink(void) +{ + addethercard("xen", pnp); +} diff --git a/sys/src/9/xen/fns.h b/sys/src/9/xen/fns.h new file mode 100644 index 000000000..b6d0fc7a1 --- /dev/null +++ b/sys/src/9/xen/fns.h @@ -0,0 +1,164 @@ +#include "../port/portfns.h" + +Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong)); +void archinit(void); +void bootargs(ulong); +ulong cankaddr(ulong); +int cistrcmp(char*, char*); +int cistrncmp(char*, char*, int); +#define clearmmucache() /* x86 doesn't have one */ +void clockintr(Ureg*, void*); +int (*cmpswap)(long*, long, long); +int cmpswap486(long*, long, long); +void (*coherence)(void); +void cpuid(int, ulong regs[]); +int cpuidentify(void); +void cpuidprint(void); +void (*cycles)(uvlong*); +void delay(int); +#define evenaddr(x) /* x86 doesn't care */ +void fpclear(void); +void fpenv(FPsave*); +void fpinit(void); +void fpoff(void); +void (*fprestore)(FPsave*); +void (*fpsave)(FPsave*); +void fpsserestore(FPsave*); +void fpsserestore0(FPsave*); +void fpssesave(FPsave*); +void fpssesave0(FPsave*); +ulong fpstatus(void); +void fpx87restore(FPsave*); +void fpx87save(FPsave*); +ulong getcr4(void); +char* getconf(char*); +void guesscpuhz(int); +void halt(void); +void mwait(void*); +void i8042reset(void); +void i8253enable(void); +void i8253init(void); +void i8253link(void); +uvlong i8253read(uvlong*); +void i8253timerset(uvlong); +int i8259disable(int); +int i8259enable(Vctl*); +void i8259init(void); +int i8259isr(int); +void i8259on(void); +void i8259off(void); +int i8259vecno(int); +void idle(void); +void idlehands(void); +int inb(int); +void insb(int, void*, int); +ushort ins(int); +void inss(int, void*, int); +ulong inl(int); +void insl(int, void*, int); +int intrdisable(int, void (*)(Ureg *, void *), void*, int, char*); +void intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +int ioalloc(int, int, int, char*); +int isaconfig(char*, int, ISAConf*); +void kbdenable(void); +#define kmapinval() +void lgdt(ushort[3]); // XXX remove and in l.s +void lidt(ushort[3]); // XXX remove and in l.s +void links(void); +void ltr(ulong); // XXX remove? +void mach0init(void); +void mathinit(void); +void mb386(void); +void mb586(void); +void mfence(void); +void mmuflushtlb(Page*); +void mmuinit(void); +ulong mmukmap(ulong, ulong, int); +int mmukmapsync(ulong); +#define mmunewpage(x) +ulong* mmuwalk(ulong*, ulong, int, int); +int mtrr(uvlong, uvlong, char *); +int mtrrprint(char *, long); +void outb(int, int); +void outsb(int, void*, int); +void outs(int, ushort); +void outss(int, void*, int); +void outl(int, ulong); +void outsl(int, void*, int); +void printcpufreq(void); +void procrestore(Proc*); +void procsave(Proc*); +void procsetup(Proc*); +void procfork(Proc*); +void putcr4(ulong); +int rdmsr(int, vlong*); +int screenprint(char*, ...); /* debugging */ +void (*screenputs)(char*, int); +void touser(void*); +void trap(Ureg*); +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +int tas(void*); +#define userureg(ur) (((ur)->cs & 0xFFFF) == UESEL) +void vectortable(void); +int wrmsr(int, vlong); +uint xchgl(uint*, uint); +uint xchgw(ushort*, uint); +uint xchgb(uchar*, uint); + +#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1])) +#define KADDR(a) ((void*)((ulong)(a)|KZERO)) +#define PADDR(a) ((ulong)(a)&~KZERO) + +#define dcflush(a, b) + +/* Xen functions */ +#define rmb() coherence() +#define wmb() coherence() +void mb(void); +void hypervisor_callback(void), failsafe_callback(void); +void xenconsinit(void); +ulong mmumapframe(ulong, ulong); +void mmumapcpu0(void); +void dprint(char *, ...); +void xenupdate(ulong *ptr, ulong val); +void xenupdatema(ulong *ptr, uvlong val); +int xenpdptpin(ulong va); +int xenpgdpin(ulong va); +int xenptpin(ulong va); +void xenptunpin(ulong va); +void xenptswitch(ulong pa); +void xentlbflush(void); +int ffs(ulong); +void xengrantinit(void); +int xengrant(domid_t domid, ulong frame, int flags); +int xengrantend(int ref); +void acceptframe(int ref, void *va); +int donateframe(int domid, void *va); +int shareframe(int domid, void *va, int write); +void xenchannotify(int); +void xenupcall(Ureg*); +ulong xenwallclock(void); +int xenstore_read(char*, char*, int); +void xenstore_write(char*, char*); +void xenstore_setd(char *dir, char *node, int value); +int xenstore_gets(char *dir, char *node, char *buf, int buflen); +int xenchanalloc(int); + +long HYPERVISOR_set_timer_op(uvlong timeout); +int HYPERVISOR_set_trap_table(trap_info_t *table); +int HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count, domid_t domid); +int HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid); +int HYPERVISOR_set_gdt(ulong *frame_list, int entries); +int HYPERVISOR_stack_switch(ulong ss, ulong esp); +int HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc); +int HYPERVISOR_fpu_taskswitch(void); +int HYPERVISOR_yield(void); +int HYPERVISOR_block(void); +int HYPERVISOR_shutdown(int); +int HYPERVISOR_multicall(void *call_list, int nr_calls); +int HYPERVISOR_event_channel_op(void *op); +int HYPERVISOR_xen_version(int cmd, void *arg); +int HYPERVISOR_console_io(int cmd, int count, char *str); +int HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count); +int HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg); diff --git a/sys/src/9/xen/l.s b/sys/src/9/xen/l.s new file mode 100644 index 000000000..ff9515c33 --- /dev/null +++ b/sys/src/9/xen/l.s @@ -0,0 +1,678 @@ +#include "xendefs.h" +#include "mem.h" + +/* + * Some machine instructions not handled by 8[al]. + */ +#define OP16 BYTE $0x66 +#define DELAY BYTE $0xEB; BYTE $0x00 /* JMP .+2 */ +#define CPUID BYTE $0x0F; BYTE $0xA2 /* CPUID, argument in AX */ +#define WRMSR BYTE $0x0F; BYTE $0x30 /* WRMSR, argument in AX/DX (lo/hi) */ +#define RDTSC BYTE $0x0F; BYTE $0x31 /* RDTSC, result in AX/DX (lo/hi) */ +#define RDMSR BYTE $0x0F; BYTE $0x32 /* RDMSR, result in AX/DX (lo/hi) */ +#define HLT BYTE $0xF4 +#define BSFL BYTE $0xf; BYTE $0xbc; BYTE $0xc0 /* bsfl AX,AX */ + +/* + * Macros for calculating offsets within the page directory base + * and page tables. Note that these are assembler-specific hence + * the '<<2'. + */ +#define PDO(a) (((((a))>>22) & 0x03FF)<<2) +#define PTO(a) (((((a))>>12) & 0x03FF)<<2) + +/* + * Entry point from XEN's "linux" builder. + * At this point RAM from 0..4M ("physical") is mapped at KZERO, + * the kernel is loaded, we're running on a boot stack and a + * boot page table. The start_info structure describes the + * situation, and is pointed to by SI. + * The stack is the highest used address. + */ +TEXT _start(SB), $0 + MOVL SP, xentop(SB) /* set global for top of mapped region */ + MOVL SI, xenstart(SB) /* set global to start_info_t */ + MOVL $0, AX /* clear EFLAGS */ + PUSHL AX + POPFL + CALL mmumapcpu0(SB) /* make mapping before using stack */ + MOVL $(MACHADDR+MACHSIZE-4), SP /* set stack */ + CALL main(SB) + +/* + * Park a processor. Should never fall through a return from main to here, + * should only be called by application processors when shutting down. + */ +TEXT idle(SB), $0 +_idle: + STI + HLT + JMP _idle + +/* + * Read/write various system registers. + * CR4 and the 'model specific registers' should only be read/written + * after it has been determined the processor supports them + */ +TEXT lgdt(SB), $0 /* GDTR - global descriptor table */ + MOVL gdtptr+0(FP), AX + MOVL (AX), GDTR + RET + +TEXT lidt(SB), $0 /* IDTR - interrupt descriptor table */ + MOVL idtptr+0(FP), AX + MOVL (AX), IDTR + RET + +TEXT ltr(SB), $0 /* TR - task register */ + MOVL tptr+0(FP), AX + MOVW AX, TASK + RET + +TEXT rtsr(SB), $0 + MOVW TASK, AX + RET + +TEXT _cycles(SB), $0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL vlong+0(FP), CX /* &vlong */ + MOVL AX, 0(CX) /* lo */ + MOVL DX, 4(CX) /* hi */ + RET + +TEXT rdmsr(SB), $0 /* model-specific register */ + MOVL index+0(FP), CX + RDMSR + MOVL vlong+4(FP), CX /* &vlong */ + MOVL AX, 0(CX) /* lo */ + MOVL DX, 4(CX) /* hi */ + RET + +TEXT wrmsr(SB), $0 + MOVL index+0(FP), CX + MOVL lo+4(FP), AX + MOVL hi+8(FP), DX +/* Xen doesn't let us do this + WRMSR + */ + RET + +/* + * Try to determine the CPU type which requires fiddling with EFLAGS. + * If the Id bit can be toggled then the CPUID instruction can be used + * to determine CPU identity and features. First have to check if it's + * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be + * toggled then it's an older 486 of some kind. + * + * cpuid(fun, regs[4]); + */ +TEXT cpuid(SB), $0 + MOVL $0x240000, AX + PUSHL AX + POPFL /* set Id|Ac */ + PUSHFL + POPL BX /* retrieve value */ + MOVL $0, AX + PUSHL AX + POPFL /* clear Id|Ac, EFLAGS initialised */ + PUSHFL + POPL AX /* retrieve value */ + XORL BX, AX + TESTL $0x040000, AX /* Ac */ + JZ _cpu386 /* can't set this bit on 386 */ + TESTL $0x200000, AX /* Id */ + JZ _cpu486 /* can't toggle this bit on some 486 */ + MOVL fn+0(FP), AX + CPUID + JMP _cpuid +_cpu486: + MOVL $0x400, AX + JMP _maybezapax +_cpu386: + MOVL $0x300, AX +_maybezapax: + CMPL fn+0(FP), $1 + JE _zaprest + XORL AX, AX +_zaprest: + XORL BX, BX + XORL CX, CX + XORL DX, DX +_cpuid: + MOVL regs+4(FP), BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET + +/* + * Floating point. + * Note: the encodings for the FCLEX, FINIT, FSAVE, FSTCW, FSENV and FSTSW + * instructions do NOT have the WAIT prefix byte (i.e. they act like their + * FNxxx variations) so WAIT instructions must be explicitly placed in the + * code as necessary. + */ +#define FPOFF(l) ;\ + MOVL CR0, AX ;\ + ANDL $0xC, AX /* EM, TS */ ;\ + CMPL AX, $0x8 ;\ + JEQ l ;\ + WAIT ;\ +l: ;\ + MOVL CR0, AX ;\ + ANDL $~0x4, AX /* EM=0 */ ;\ + ORL $0x28, AX /* NE=1, TS=1 */ ;\ + MOVL AX, CR0 + +#define FPON ;\ + MOVL CR0, AX ;\ + ANDL $~0xC, AX /* EM=0, TS=0 */ ;\ + MOVL AX, CR0 + +TEXT fpoff(SB), $0 /* disable */ + FPOFF(l1) + RET + +TEXT fpinit(SB), $0 /* enable and init */ + FPON + FINIT + WAIT + /* setfcr(FPPDBL|FPRNR|FPINVAL|FPZDIV|FPOVFL) */ + /* note that low 6 bits are masks, not enables, on this chip */ + PUSHW $0x0232 + FLDCW 0(SP) + POPW AX + WAIT + RET + +TEXT fpx87save(SB), $0 /* save state and disable */ + MOVL p+0(FP), AX + FSAVE 0(AX) /* no WAIT */ + FPOFF(l2) + RET + +TEXT fpx87restore(SB), $0 /* enable and restore state */ + FPON + MOVL p+0(FP), AX + FRSTOR 0(AX) + WAIT + RET + +TEXT fpstatus(SB), $0 /* get floating point status */ + FSTSW AX + RET + +TEXT fpenv(SB), $0 /* save state without waiting */ + MOVL p+0(FP), AX + FSTENV 0(AX) + RET + +TEXT fpclear(SB), $0 /* clear pending exceptions */ + FPON + FCLEX /* no WAIT */ + FPOFF(l3) + RET + +/* + * Test-And-Set + */ +TEXT tas(SB), $0 + MOVL $0xDEADDEAD, AX + MOVL lock+0(FP), BX + XCHGL AX, (BX) /* lock->key */ + RET + +TEXT _xinc(SB), $0 /* void _xinc(long*); */ + MOVL l+0(FP), AX + LOCK; INCL 0(AX) + RET + +TEXT _xdec(SB), $0 /* long _xdec(long*); */ + MOVL l+0(FP), BX + XORL AX, AX + LOCK; DECL 0(BX) + JLT _xdeclt + JGT _xdecgt + RET +_xdecgt: + INCL AX + RET +_xdeclt: + DECL AX + RET + +TEXT getstack(SB), $0 + MOVL SP, AX + RET +TEXT mb386(SB), $0 + POPL AX /* return PC */ + PUSHFL + PUSHL CS + PUSHL AX + IRETL + +TEXT mb586(SB), $0 + XORL AX, AX + CPUID + RET + +TEXT sfence(SB), $0 + BYTE $0x0f + BYTE $0xae + BYTE $0xf8 + RET + +TEXT lfence(SB), $0 + BYTE $0x0f + BYTE $0xae + BYTE $0xe8 + RET + +TEXT mfence(SB), $0 + BYTE $0x0f + BYTE $0xae + BYTE $0xf0 + RET + + +TEXT xchgw(SB), $0 + MOVL v+4(FP), AX + MOVL p+0(FP), BX + XCHGW AX, (BX) + RET + +TEXT xchgb(SB), $0 + MOVL v+4(FP), AX + MOVL p+0(FP), BX + XCHGB AX, (BX) + RET + +TEXT xchgl(SB), $0 + MOVL v+4(FP), AX + MOVL p+0(FP), BX + XCHGL AX, (BX) + RET + +/* Return the position of the first bit set. Undefined if zero. */ +TEXT ffs(SB), $0 + MOVL v+0(FP), AX + BSFL + RET + +TEXT cmpswap486(SB), $0 + MOVL addr+0(FP), BX + MOVL old+4(FP), AX + MOVL new+8(FP), CX + LOCK + BYTE $0x0F; BYTE $0xB1; BYTE $0x0B /* CMPXCHGL CX, (BX) */ + JNZ didnt + MOVL $1, AX + RET +didnt: + XORL AX,AX + RET + +TEXT mul64fract(SB), $0 + MOVL r+0(FP), CX + XORL BX, BX /* BX = 0 */ + + MOVL a+8(FP), AX + MULL b+16(FP) /* a1*b1 */ + MOVL AX, 4(CX) /* r2 = lo(a1*b1) */ + + MOVL a+8(FP), AX + MULL b+12(FP) /* a1*b0 */ + MOVL AX, 0(CX) /* r1 = lo(a1*b0) */ + ADDL DX, 4(CX) /* r2 += hi(a1*b0) */ + + MOVL a+4(FP), AX + MULL b+16(FP) /* a0*b1 */ + ADDL AX, 0(CX) /* r1 += lo(a0*b1) */ + ADCL DX, 4(CX) /* r2 += hi(a0*b1) + carry */ + + MOVL a+4(FP), AX + MULL b+12(FP) /* a0*b0 */ + ADDL DX, 0(CX) /* r1 += hi(a0*b0) */ + ADCL BX, 4(CX) /* r2 += carry */ + RET + +/* + * label consists of a stack pointer and a PC + */ +TEXT gotolabel(SB), $0 + MOVL label+0(FP), AX + MOVL 0(AX), SP /* restore sp */ + MOVL 4(AX), AX /* put return pc on the stack */ + MOVL AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), $0 + MOVL label+0(FP), AX + MOVL SP, 0(AX) /* store sp */ + MOVL 0(SP), BX /* store return pc */ + MOVL BX, 4(AX) + MOVL $0, AX /* return 0 */ + RET + +TEXT mwait(SB), $0 + MOVL addr+0(FP), AX + MOVL (AX), CX + ORL CX, CX + JNZ _mwaitdone + XORL DX, DX + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + MOVL (AX), CX + ORL CX, CX + JNZ _mwaitdone + XORL AX, AX + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ +_mwaitdone: + RET + +/* + * Interrupt/exception handling. + * Each entry in the vector table calls either _strayintr or _strayintrx depending + * on whether an error code has been automatically pushed onto the stack + * (_strayintrx) or not, in which case a dummy entry must be pushed before retrieving + * the trap type from the vector table entry and placing it on the stack as part + * of the Ureg structure. + * Exceptions to this are the syscall vector and the page fault + * vector. Syscalls are dispatched seperately. Page faults + * have to take care of the extra cr2 parameter that xen places + * at the top of the stack. + * The size of each entry in the vector table (6 bytes) is known in trapinit(). + */ +TEXT _strayintr(SB), $0 + PUSHL AX /* save AX */ + MOVL 4(SP), AX /* return PC from vectortable(SB) */ + JMP intrcommon + +TEXT _strayintrx(SB), $0 + XCHGL AX, (SP) /* swap AX with vectortable CALL PC */ +intrcommon: + PUSHL DS /* save DS */ + PUSHL $(KDSEL) + POPL DS /* fix up DS */ + MOVBLZX (AX), AX /* trap type -> AX */ + XCHGL AX, 4(SP) /* exchange trap type with saved AX */ + + PUSHL ES /* save ES */ + PUSHL $(KDSEL) + POPL ES /* fix up ES */ + + PUSHL FS /* save the rest of the Ureg struct */ + PUSHL GS + PUSHAL + + PUSHL SP /* Ureg* argument to trap */ + CALL trap(SB) + +TEXT forkret(SB), $0 + POPL AX + POPAL + POPL GS + POPL FS + POPL ES + POPL DS + ADDL $8, SP /* pop error code and trap type */ + IRETL + +TEXT vectortable(SB), $0 + CALL _strayintr(SB); BYTE $0x00 /* divide error */ + CALL _strayintr(SB); BYTE $0x01 /* debug exception */ + CALL _strayintr(SB); BYTE $0x02 /* NMI interrupt */ + CALL _strayintr(SB); BYTE $0x03 /* breakpoint */ + CALL _strayintr(SB); BYTE $0x04 /* overflow */ + CALL _strayintr(SB); BYTE $0x05 /* bound */ + CALL _strayintr(SB); BYTE $0x06 /* invalid opcode */ + CALL _strayintr(SB); BYTE $0x07 /* no coprocessor available */ + CALL _strayintrx(SB); BYTE $0x08 /* double fault */ + CALL _strayintr(SB); BYTE $0x09 /* coprocessor segment overflow */ + CALL _strayintrx(SB); BYTE $0x0A /* invalid TSS */ + CALL _strayintrx(SB); BYTE $0x0B /* segment not available */ + CALL _strayintrx(SB); BYTE $0x0C /* stack exception */ + CALL _strayintrx(SB); BYTE $0x0D /* general protection error */ + CALL _strayintrx(SB); BYTE $0x0E /* page fault */ + CALL _strayintr(SB); BYTE $0x0F /* */ + CALL _strayintr(SB); BYTE $0x10 /* coprocessor error */ + CALL _strayintrx(SB); BYTE $0x11 /* alignment check */ + CALL _strayintr(SB); BYTE $0x12 /* machine check */ + CALL _strayintr(SB); BYTE $0x13 + CALL _strayintr(SB); BYTE $0x14 + CALL _strayintr(SB); BYTE $0x15 + CALL _strayintr(SB); BYTE $0x16 + CALL _strayintr(SB); BYTE $0x17 + CALL _strayintr(SB); BYTE $0x18 + CALL _strayintr(SB); BYTE $0x19 + CALL _strayintr(SB); BYTE $0x1A + CALL _strayintr(SB); BYTE $0x1B + CALL _strayintr(SB); BYTE $0x1C + CALL _strayintr(SB); BYTE $0x1D + CALL _strayintr(SB); BYTE $0x1E + CALL _strayintr(SB); BYTE $0x1F + CALL _strayintr(SB); BYTE $0x20 /* VectorLAPIC */ + CALL _strayintr(SB); BYTE $0x21 + CALL _strayintr(SB); BYTE $0x22 + CALL _strayintr(SB); BYTE $0x23 + CALL _strayintr(SB); BYTE $0x24 + CALL _strayintr(SB); BYTE $0x25 + CALL _strayintr(SB); BYTE $0x26 + CALL _strayintr(SB); BYTE $0x27 + CALL _strayintr(SB); BYTE $0x28 + CALL _strayintr(SB); BYTE $0x29 + CALL _strayintr(SB); BYTE $0x2A + CALL _strayintr(SB); BYTE $0x2B + CALL _strayintr(SB); BYTE $0x2C + CALL _strayintr(SB); BYTE $0x2D + CALL _strayintr(SB); BYTE $0x2E + CALL _strayintr(SB); BYTE $0x2F + CALL _strayintr(SB); BYTE $0x30 + CALL _strayintr(SB); BYTE $0x31 + CALL _strayintr(SB); BYTE $0x32 + CALL _strayintr(SB); BYTE $0x33 + CALL _strayintr(SB); BYTE $0x34 + CALL _strayintr(SB); BYTE $0x35 + CALL _strayintr(SB); BYTE $0x36 + CALL _strayintr(SB); BYTE $0x37 + CALL _strayintr(SB); BYTE $0x38 + CALL _strayintr(SB); BYTE $0x39 + CALL _strayintr(SB); BYTE $0x3A + CALL _strayintr(SB); BYTE $0x3B + CALL _strayintr(SB); BYTE $0x3C + CALL _strayintr(SB); BYTE $0x3D + CALL _strayintr(SB); BYTE $0x3E + CALL _strayintr(SB); BYTE $0x3F + CALL _syscallintr(SB); BYTE $0x40 /* VectorSYSCALL */ + CALL _strayintr(SB); BYTE $0x41 + CALL _strayintr(SB); BYTE $0x42 + CALL _strayintr(SB); BYTE $0x43 + CALL _strayintr(SB); BYTE $0x44 + CALL _strayintr(SB); BYTE $0x45 + CALL _strayintr(SB); BYTE $0x46 + CALL _strayintr(SB); BYTE $0x47 + CALL _strayintr(SB); BYTE $0x48 + CALL _strayintr(SB); BYTE $0x49 + CALL _strayintr(SB); BYTE $0x4A + CALL _strayintr(SB); BYTE $0x4B + CALL _strayintr(SB); BYTE $0x4C + CALL _strayintr(SB); BYTE $0x4D + CALL _strayintr(SB); BYTE $0x4E + CALL _strayintr(SB); BYTE $0x4F + CALL _strayintr(SB); BYTE $0x50 + CALL _strayintr(SB); BYTE $0x51 + CALL _strayintr(SB); BYTE $0x52 + CALL _strayintr(SB); BYTE $0x53 + CALL _strayintr(SB); BYTE $0x54 + CALL _strayintr(SB); BYTE $0x55 + CALL _strayintr(SB); BYTE $0x56 + CALL _strayintr(SB); BYTE $0x57 + CALL _strayintr(SB); BYTE $0x58 + CALL _strayintr(SB); BYTE $0x59 + CALL _strayintr(SB); BYTE $0x5A + CALL _strayintr(SB); BYTE $0x5B + CALL _strayintr(SB); BYTE $0x5C + CALL _strayintr(SB); BYTE $0x5D + CALL _strayintr(SB); BYTE $0x5E + CALL _strayintr(SB); BYTE $0x5F + CALL _strayintr(SB); BYTE $0x60 + CALL _strayintr(SB); BYTE $0x61 + CALL _strayintr(SB); BYTE $0x62 + CALL _strayintr(SB); BYTE $0x63 + CALL _strayintr(SB); BYTE $0x64 + CALL _strayintr(SB); BYTE $0x65 + CALL _strayintr(SB); BYTE $0x66 + CALL _strayintr(SB); BYTE $0x67 + CALL _strayintr(SB); BYTE $0x68 + CALL _strayintr(SB); BYTE $0x69 + CALL _strayintr(SB); BYTE $0x6A + CALL _strayintr(SB); BYTE $0x6B + CALL _strayintr(SB); BYTE $0x6C + CALL _strayintr(SB); BYTE $0x6D + CALL _strayintr(SB); BYTE $0x6E + CALL _strayintr(SB); BYTE $0x6F + CALL _strayintr(SB); BYTE $0x70 + CALL _strayintr(SB); BYTE $0x71 + CALL _strayintr(SB); BYTE $0x72 + CALL _strayintr(SB); BYTE $0x73 + CALL _strayintr(SB); BYTE $0x74 + CALL _strayintr(SB); BYTE $0x75 + CALL _strayintr(SB); BYTE $0x76 + CALL _strayintr(SB); BYTE $0x77 + CALL _strayintr(SB); BYTE $0x78 + CALL _strayintr(SB); BYTE $0x79 + CALL _strayintr(SB); BYTE $0x7A + CALL _strayintr(SB); BYTE $0x7B + CALL _strayintr(SB); BYTE $0x7C + CALL _strayintr(SB); BYTE $0x7D + CALL _strayintr(SB); BYTE $0x7E + CALL _strayintr(SB); BYTE $0x7F + CALL _strayintr(SB); BYTE $0x80 /* Vector[A]PIC */ + CALL _strayintr(SB); BYTE $0x81 + CALL _strayintr(SB); BYTE $0x82 + CALL _strayintr(SB); BYTE $0x83 + CALL _strayintr(SB); BYTE $0x84 + CALL _strayintr(SB); BYTE $0x85 + CALL _strayintr(SB); BYTE $0x86 + CALL _strayintr(SB); BYTE $0x87 + CALL _strayintr(SB); BYTE $0x88 + CALL _strayintr(SB); BYTE $0x89 + CALL _strayintr(SB); BYTE $0x8A + CALL _strayintr(SB); BYTE $0x8B + CALL _strayintr(SB); BYTE $0x8C + CALL _strayintr(SB); BYTE $0x8D + CALL _strayintr(SB); BYTE $0x8E + CALL _strayintr(SB); BYTE $0x8F + CALL _strayintr(SB); BYTE $0x90 + CALL _strayintr(SB); BYTE $0x91 + CALL _strayintr(SB); BYTE $0x92 + CALL _strayintr(SB); BYTE $0x93 + CALL _strayintr(SB); BYTE $0x94 + CALL _strayintr(SB); BYTE $0x95 + CALL _strayintr(SB); BYTE $0x96 + CALL _strayintr(SB); BYTE $0x97 + CALL _strayintr(SB); BYTE $0x98 + CALL _strayintr(SB); BYTE $0x99 + CALL _strayintr(SB); BYTE $0x9A + CALL _strayintr(SB); BYTE $0x9B + CALL _strayintr(SB); BYTE $0x9C + CALL _strayintr(SB); BYTE $0x9D + CALL _strayintr(SB); BYTE $0x9E + CALL _strayintr(SB); BYTE $0x9F + CALL _strayintr(SB); BYTE $0xA0 + CALL _strayintr(SB); BYTE $0xA1 + CALL _strayintr(SB); BYTE $0xA2 + CALL _strayintr(SB); BYTE $0xA3 + CALL _strayintr(SB); BYTE $0xA4 + CALL _strayintr(SB); BYTE $0xA5 + CALL _strayintr(SB); BYTE $0xA6 + CALL _strayintr(SB); BYTE $0xA7 + CALL _strayintr(SB); BYTE $0xA8 + CALL _strayintr(SB); BYTE $0xA9 + CALL _strayintr(SB); BYTE $0xAA + CALL _strayintr(SB); BYTE $0xAB + CALL _strayintr(SB); BYTE $0xAC + CALL _strayintr(SB); BYTE $0xAD + CALL _strayintr(SB); BYTE $0xAE + CALL _strayintr(SB); BYTE $0xAF + CALL _strayintr(SB); BYTE $0xB0 + CALL _strayintr(SB); BYTE $0xB1 + CALL _strayintr(SB); BYTE $0xB2 + CALL _strayintr(SB); BYTE $0xB3 + CALL _strayintr(SB); BYTE $0xB4 + CALL _strayintr(SB); BYTE $0xB5 + CALL _strayintr(SB); BYTE $0xB6 + CALL _strayintr(SB); BYTE $0xB7 + CALL _strayintr(SB); BYTE $0xB8 + CALL _strayintr(SB); BYTE $0xB9 + CALL _strayintr(SB); BYTE $0xBA + CALL _strayintr(SB); BYTE $0xBB + CALL _strayintr(SB); BYTE $0xBC + CALL _strayintr(SB); BYTE $0xBD + CALL _strayintr(SB); BYTE $0xBE + CALL _strayintr(SB); BYTE $0xBF + CALL _strayintr(SB); BYTE $0xC0 + CALL _strayintr(SB); BYTE $0xC1 + CALL _strayintr(SB); BYTE $0xC2 + CALL _strayintr(SB); BYTE $0xC3 + CALL _strayintr(SB); BYTE $0xC4 + CALL _strayintr(SB); BYTE $0xC5 + CALL _strayintr(SB); BYTE $0xC6 + CALL _strayintr(SB); BYTE $0xC7 + CALL _strayintr(SB); BYTE $0xC8 + CALL _strayintr(SB); BYTE $0xC9 + CALL _strayintr(SB); BYTE $0xCA + CALL _strayintr(SB); BYTE $0xCB + CALL _strayintr(SB); BYTE $0xCC + CALL _strayintr(SB); BYTE $0xCD + CALL _strayintr(SB); BYTE $0xCE + CALL _strayintr(SB); BYTE $0xCF + CALL _strayintr(SB); BYTE $0xD0 + CALL _strayintr(SB); BYTE $0xD1 + CALL _strayintr(SB); BYTE $0xD2 + CALL _strayintr(SB); BYTE $0xD3 + CALL _strayintr(SB); BYTE $0xD4 + CALL _strayintr(SB); BYTE $0xD5 + CALL _strayintr(SB); BYTE $0xD6 + CALL _strayintr(SB); BYTE $0xD7 + CALL _strayintr(SB); BYTE $0xD8 + CALL _strayintr(SB); BYTE $0xD9 + CALL _strayintr(SB); BYTE $0xDA + CALL _strayintr(SB); BYTE $0xDB + CALL _strayintr(SB); BYTE $0xDC + CALL _strayintr(SB); BYTE $0xDD + CALL _strayintr(SB); BYTE $0xDE + CALL _strayintr(SB); BYTE $0xDF + CALL _strayintr(SB); BYTE $0xE0 + CALL _strayintr(SB); BYTE $0xE1 + CALL _strayintr(SB); BYTE $0xE2 + CALL _strayintr(SB); BYTE $0xE3 + CALL _strayintr(SB); BYTE $0xE4 + CALL _strayintr(SB); BYTE $0xE5 + CALL _strayintr(SB); BYTE $0xE6 + CALL _strayintr(SB); BYTE $0xE7 + CALL _strayintr(SB); BYTE $0xE8 + CALL _strayintr(SB); BYTE $0xE9 + CALL _strayintr(SB); BYTE $0xEA + CALL _strayintr(SB); BYTE $0xEB + CALL _strayintr(SB); BYTE $0xEC + CALL _strayintr(SB); BYTE $0xED + CALL _strayintr(SB); BYTE $0xEE + CALL _strayintr(SB); BYTE $0xEF + CALL _strayintr(SB); BYTE $0xF0 + CALL _strayintr(SB); BYTE $0xF1 + CALL _strayintr(SB); BYTE $0xF2 + CALL _strayintr(SB); BYTE $0xF3 + CALL _strayintr(SB); BYTE $0xF4 + CALL _strayintr(SB); BYTE $0xF5 + CALL _strayintr(SB); BYTE $0xF6 + CALL _strayintr(SB); BYTE $0xF7 + CALL _strayintr(SB); BYTE $0xF8 + CALL _strayintr(SB); BYTE $0xF9 + CALL _strayintr(SB); BYTE $0xFA + CALL _strayintr(SB); BYTE $0xFB + CALL _strayintr(SB); BYTE $0xFC + CALL _strayintr(SB); BYTE $0xFD + CALL _strayintr(SB); BYTE $0xFE + CALL _strayintr(SB); BYTE $0xFF diff --git a/sys/src/9/xen/main.c b/sys/src/9/xen/main.c new file mode 100644 index 000000000..966b39997 --- /dev/null +++ b/sys/src/9/xen/main.c @@ -0,0 +1,801 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "init.h" +#include "pool.h" +#include "reboot.h" +#include <tos.h> + +Mach *m; + +#define BOOTARGS (xenstart->cmd_line) +#define BOOTARGSLEN (sizeof xenstart->cmd_line) +#define MAXCONF 64 + +enum { + /* space for syscall args, return PC, top-of-stack struct */ + Ustkheadroom = sizeof(Sargs) + sizeof(uintptr) + sizeof(Tos), +}; + +char bootdisk[KNAMELEN]; +Conf conf; +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; +uchar *sp; /* user stack of init proc */ +int idle_spin; + +static void +options(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getfields(cp, line, MAXCONF, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +void +main(void) +{ + mach0init(); + options(); + quotefmtinstall(); + xenconsinit(); + + //consdebug = rdb; + print("\nPlan 9 (%s)\n", xenstart->magic); + + cpuidentify(); + // meminit() is not for us + confinit(); + archinit(); + xinit(); + trapinit(); + printinit(); + cpuidprint(); + mmuinit(); + if(arch->intrinit) /* launches other processors on an mp */ + arch->intrinit(); + timersinit(); + mathinit(); + kbdenable(); + xengrantinit(); + if(arch->clockenable) + arch->clockenable(); + procinit0(); + initseg(); + + links(); +// conf.monitor = 1; + chandevreset(); + pageinit(); + + swapinit(); + userinit(); + active.thunderbirdsarego = 1; + schedinit(); +} + +void +mach0init(void) +{ + m = (Mach*)MACHADDR; + m->machno = 0; + conf.nmach = 1; + MACHP(0) = (Mach*)CPU0MACH; + m->pdb = (ulong*)xenstart->pt_base; +#ifdef NOT + m->gdt = (Segdesc*)CPU0GDT; +#endif + + machinit(); + + active.machs = 1; + active.exiting = 0; +} + +void +machinit(void) +{ + int machno; + ulong *pdb; + Segdesc *gdt; + + machno = m->machno; + pdb = m->pdb; + gdt = m->gdt; + memset(m, 0, sizeof(Mach)); + m->machno = machno; + m->pdb = pdb; + m->gdt = gdt; + m->perf.period = 1; + + /* + * For polled uart output at boot, need + * a default delay constant. 100000 should + * be enough for a while. Cpuidentify will + * calculate the real value later. + */ + m->loopconst = 100000; + m->cpumhz = 1000; // XXX! + + HYPERVISOR_shared_info = (shared_info_t*)mmumapframe(XENSHARED, (xenstart->shared_info)>>PGSHIFT); + + // XXX m->shared = &HYPERVISOR_shared_info->vcpu_data[m->machno]; +} + +void +init0(void) +{ + int i; + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + chandevinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", arch->id, conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "386", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + ksetenv("readparts", "1", 0); + for(i = 0; i < nconf; i++){ + if(confname[i][0] != '*') + ksetenv(confname[i], confval[i], 0); + ksetenv(confname[i], confval[i], 1); + } + poperror(); + } + + kproc("alarm", alarmkproc, 0); + touser(sp); +} + +void +userinit(void) +{ + Proc *p; + Segment *s; + KMap *k; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + p->fpstate = FPinit; + fpoff(); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * 4 bytes for gotolabel's return PC + */ + p->sched.pc = (ulong)init0; + p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD); + + /* + * User Stack + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG); + p->seg[SSEG] = s; + pg = newpage(1, 0, USTKTOP-BY2PG); + segpage(s, pg); + k = kmap(pg); + bootargs(VA(k)); + kunmap(k); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, 1); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(1, 0, UTZERO); + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + segpage(s, pg); + k = kmap(s->map[0]->pages[0]); + memmove((ulong*)VA(k), initcode, sizeof initcode); + kunmap(k); + ready(p); +} + +uchar * +pusharg(char *p) +{ + int n; + + n = strlen(p)+1; + sp -= n; + memmove(sp, p, n); + return sp; +} + +void +bootargs(ulong base) +{ + int i, ac; + uchar *av[32]; + uchar **lsp; + + sp = (uchar*)base + BY2PG - Ustkheadroom; + + ac = 0; + av[ac++] = pusharg("/386/9dos"); + av[ac++] = pusharg("-D"); + + /* 4 byte word align stack */ + sp = (uchar*)((ulong)sp & ~3); + + /* build argc, argv on stack */ + sp -= (ac+1)*sizeof(sp); + lsp = (uchar**)sp; + for(i = 0; i < ac; i++) + *lsp++ = av[i] + ((USTKTOP - BY2PG) - base); + *lsp = 0; + sp += (USTKTOP - BY2PG) - base - sizeof(ulong); +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +static void +writeconf(void) +{ + char *p, *q; + int n; + + p = getconfenv(); + + if(waserror()) { + free(p); + nexterror(); + } + + /* convert to name=value\n format */ + for(q=p; *q; q++) { + q += strlen(q); + *q = '='; + q += strlen(q); + *q = '\n'; + } + n = q - p + 1; + if(n >= BOOTARGSLEN) + error("kernel configuration too large"); + memmove(BOOTARGS, p, n); + poperror(); + free(p); +} + +void +confinit(void) +{ + char *p; + int i, userpcnt; + ulong kpages; + + for(i = 0; i < nconf; i++) + print("%s=%s\n", confname[i], confval[i]); + /* + * all ram above xentop is free, but must be mappable + * to virt addrs less than VIRT_START. + */ + kpages = PADDR(hypervisor_virt_start)>>PGSHIFT; + if(xenstart->nr_pages <= kpages) + kpages = xenstart->nr_pages; + else + print("Warning: Plan 9 / Xen limitation - " + "using only %lud of %lud available RAM pages\n", + kpages, xenstart->nr_pages); + xentop = PGROUND(PADDR(xentop)); + conf.mem[0].npage = kpages - (xentop>>PGSHIFT); + conf.mem[0].base = xentop; + + if(p = getconf("*kernelpercent")) + userpcnt = 100 - strtol(p, 0, 0); + else + userpcnt = 0; + + conf.npage = 0; + for(i=0; i<nelem(conf.mem); i++) + conf.npage += conf.mem[i].npage; + + conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5; + if(cpuserver) + conf.nproc *= 3; + if(conf.nproc > 2000) + conf.nproc = 2000; + conf.nimage = 200; + conf.nswap = conf.nproc*80; + conf.nswppo = 4096; + + if(cpuserver) { + if(userpcnt < 10) + userpcnt = 70; + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Hack for the big boys. Only good while physmem < 4GB. + * Give the kernel fixed max + enough to allocate the + * page pool. + * This is an overestimate as conf.upages < conf.npages. + * The patch of nimage is a band-aid, scanning the whole + * page list in imagereclaim just takes too long. + */ + if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){ + kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG; + conf.nimage = 2000; + kpages += (conf.nproc*KSTACK)/BY2PG; + } + } else { + if(userpcnt < 10) { + if(conf.npage*BY2PG < 16*MB) + userpcnt = 40; + else + userpcnt = 60; + } + kpages = conf.npage - (conf.npage*userpcnt)/100; + + /* + * Make sure terminals with low memory get at least + * 4MB on the first Image chunk allocation. + */ + if(conf.npage*BY2PG < 16*MB) + imagmem->minarena = 4*1024*1024; + } + + /* + * can't go past the end of virtual memory + * (ulong)-KZERO is 2^32 - KZERO + */ + if(kpages > ((ulong)-KZERO)/BY2PG) + kpages = ((ulong)-KZERO)/BY2PG; + + conf.upages = conf.npage - kpages; + conf.ialloc = (kpages/2)*BY2PG; + + /* + * Guess how much is taken by the large permanent + * datastructures. Mntcache and Mntrpc are not accounted for + * (probably ~300KB). + */ + kpages *= BY2PG; + kpages -= conf.upages*sizeof(Page) + + conf.nproc*sizeof(Proc) + + conf.nimage*sizeof(Image) + + conf.nswap + + conf.nswppo*sizeof(Page); + mainmem->maxsize = kpages; + if(!cpuserver){ + /* + * give terminals lots of image memory, too; the dynamic + * allocation will balance the load properly, hopefully. + * be careful with 32-bit overflow. + */ + imagmem->maxsize = kpages; + } +} + +static char* mathmsg[] = +{ + nil, /* handled below */ + "denormalized operand", + "division by zero", + "numeric overflow", + "numeric underflow", + "precision loss", +}; + +static void +mathnote(void) +{ + int i; + ulong status; + char *msg, note[ERRMAX]; + + status = up->fpsave.status; + + /* + * Some attention should probably be paid here to the + * exception masks and error summary. + */ + msg = "unknown exception"; + for(i = 1; i <= 5; i++){ + if(!((1<<i) & status)) + continue; + msg = mathmsg[i]; + break; + } + if(status & 0x01){ + if(status & 0x40){ + if(status & 0x200) + msg = "stack overflow"; + else + msg = "stack underflow"; + }else + msg = "invalid operation"; + } + snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux", + msg, up->fpsave.pc, status); + postnote(up, 1, note, NDebug); +} + +/* + * math coprocessor error + */ +static void +matherror(Ureg *ur, void*) +{ + /* + * a write cycle to port 0xF0 clears the interrupt latch attached + * to the error# line from the 387 + */ + if(!(m->cpuiddx & 0x01)) + outb(0xF0, 0xFF); + + /* + * save floating point state to check out error + */ + fpenv(&up->fpsave); + mathnote(); + + if(ur->pc & KZERO) + panic("fp: status %ux fppc=0x%lux pc=0x%lux", + up->fpsave.status, up->fpsave.pc, ur->pc); +} + +/* + * math coprocessor emulation fault + */ +static void +mathemu(Ureg *ureg, void*) +{ + if(up->fpstate & FPillegal){ + /* someone did floating point in a note handler */ + postnote(up, 1, "sys: floating point in note handler", NDebug); + return; + } + switch(up->fpstate){ + case FPinit: + fpinit(); + up->fpstate = FPactive; + break; + case FPinactive: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + * More attention should probably be paid here to the + * exception masks and error summary. + */ + if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ + mathnote(); + break; + } + fprestore(&up->fpsave); + up->fpstate = FPactive; + break; + case FPactive: + panic("math emu pid %ld %s pc 0x%lux", + up->pid, up->text, ureg->pc); + break; + } +} + +/* + * math coprocessor segment overrun + */ +static void +mathover(Ureg*, void*) +{ + pexit("math overrun", 0); +} + +void +mathinit(void) +{ + trapenable(VectorCERR, matherror, 0, "matherror"); + //if(X86FAMILY(m->cpuidax) == 3) + // intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror"); + trapenable(VectorCNA, mathemu, 0, "mathemu"); + trapenable(VectorCSO, mathover, 0, "mathover"); +} + +/* + * set up floating point for a new process + */ +void +procsetup(Proc*p) +{ + p->fpstate = FPinit; + fpoff(); +} + +void +procfork(Proc *p) +{ + int s; + + p->kentry = up->kentry; + p->pcycles = -p->kentry; + + /* inherit user descriptors */ + memmove(p->gdt, up->gdt, sizeof(p->gdt)); + + /* copy local descriptor table */ + if(up->ldt != nil && up->nldt > 0){ + p->ldt = smalloc(sizeof(Segdesc) * up->nldt); + memmove(p->ldt, up->ldt, sizeof(Segdesc) * up->nldt); + p->nldt = up->nldt; + } + + /* save floating point state */ + s = splhi(); + switch(up->fpstate & ~FPillegal){ + case FPactive: + fpsave(&up->fpsave); + up->fpstate = FPinactive; + case FPinactive: + p->fpsave = up->fpsave; + p->fpstate = FPinactive; + } + splx(s); +} + +void +procrestore(Proc *p) +{ + uvlong t; + + if(p->kp) + return; + cycles(&t); + p->pcycles -= t; +} + +/* + * Save the mach dependent part of the process state. + */ +void +procsave(Proc *p) +{ + uvlong t; + + cycles(&t); + p->pcycles += t; + if(p->fpstate == FPactive){ + if(p->state == Moribund) + fpclear(); + else{ + /* + * Fpsave() stores without handling pending + * unmasked exeptions. Postnote() can't be called + * here as sleep() already has up->rlock, so + * the handling of pending exceptions is delayed + * until the process runs again and generates an + * emulation fault to activate the FPU. + */ + fpsave(&p->fpsave); + } + p->fpstate = FPinactive; + } + + /* + * While this processor is in the scheduler, the process could run + * on another processor and exit, returning the page tables to + * the free list where they could be reallocated and overwritten. + * When this processor eventually has to get an entry from the + * trashed page tables it will crash. + * + * If there's only one processor, this can't happen. + * You might think it would be a win not to do this in that case, + * especially on VMware, but it turns out not to matter. + */ + mmuflushtlb(0); +} + +static void +shutdown(int ispanic) +{ + int ms, once; + + lock(&active); + if(ispanic) + active.ispanic = ispanic; + else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0) + active.ispanic = 0; + once = active.machs & (1<<m->machno); + active.machs &= ~(1<<m->machno); + active.exiting = 1; + unlock(&active); + + if(once) + print("cpu%d: exiting\n", m->machno); + //spllo(); + for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){ + delay(TK2MS(2)); + if(active.machs == 0 && consactive() == 0) + break; + } + + if(getconf("*debug")) + delay(5*60*1000); + + if(active.ispanic){ + if(!cpuserver) + for(;;) + halt(); + delay(10000); + }else + delay(1000); +} + +void +reboot(void *entry, void *code, ulong size) +{ + void (*f)(ulong, ulong, ulong); + //ulong *pdb; + + writeconf(); + + shutdown(0); + + /* + * should be the only processor running now + */ + + print("shutting down...\n"); + delay(200); + + splhi(); + + /* turn off buffered serial console */ + serialoq = nil; + + /* shutdown devices */ + chandevshutdown(); + + /* reboot(0, ...) on Xen causes domU shutdown */ + if(entry == 0) + HYPERVISOR_shutdown(0); + + /* + * Modify the machine page table to directly map the low 4MB of memory + * This allows the reboot code to turn off the page mapping + */ + //pdb = m->pdb; + //pdb[PDX(0)] = pdb[PDX(KZERO)]; + mmuflushtlb(0); + + /* setup reboot trampoline function */ + f = (void*)REBOOTADDR; + memmove(f, rebootcode, sizeof(rebootcode)); + + print("rebooting...\n"); + + /* off we go - never to return */ + (*f)(PADDR(entry), PADDR(code), size); +} + + +void +exit(int ispanic) +{ + shutdown(ispanic); + arch->reset(); +} + +int +cistrcmp(char *a, char *b) +{ + int ac, bc; + + for(;;){ + ac = *a++; + bc = *b++; + + if(ac >= 'A' && ac <= 'Z') + ac = 'a' + (ac - 'A'); + if(bc >= 'A' && bc <= 'Z') + bc = 'a' + (bc - 'A'); + ac -= bc; + if(ac) + return ac; + if(bc == 0) + break; + } + return 0; +} + +int +cistrncmp(char *a, char *b, int n) +{ + unsigned ac, bc; + + while(n > 0){ + ac = *a++; + bc = *b++; + n--; + + if(ac >= 'A' && ac <= 'Z') + ac = 'a' + (ac - 'A'); + if(bc >= 'A' && bc <= 'Z') + bc = 'a' + (bc - 'A'); + + ac -= bc; + if(ac) + return ac; + if(bc == 0) + break; + } + + return 0; +} + diff --git a/sys/src/9/xen/mem.h b/sys/src/9/xen/mem.h new file mode 100644 index 000000000..d0b3cc356 --- /dev/null +++ b/sys/src/9/xen/mem.h @@ -0,0 +1,160 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ + +#define MIN(a, b) ((a) < (b)? (a): (b)) +#define MAX(a, b) ((a) > (b)? (a): (b)) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BI2WD 32 /* bits per word */ +#define BY2WD 4 /* bytes per word */ +#define BY2V 8 /* bytes per double word */ +#define BY2PG 4096 /* bytes per page */ +#define WD2PG (BY2PG/BY2WD) /* words per page */ +#define PGSHIFT 12 /* log(BY2PG) */ +#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) +#define PGROUND(s) ROUND(s, BY2PG) +#define BLOCKALIGN 8 +#define FPalign 16 /* required for FXSAVE */ + +#define MAXMACH 8 /* max # cpus system can run */ +#define MAX_VIRT_CPUS MAXMACH +#define KSTACK 4096 /* Size of kernel stack */ + +/* + * Time + */ +#define HZ (100) /* clock frequency */ +#define MS2HZ (1000/HZ) /* millisec per clock tick */ +#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */ + +/* + * Fundamental addresses + */ +#define REBOOTADDR 0x00001000 /* reboot code - physical address */ +#define APBOOTSTRAP 0x80001000 /* AP bootstrap code */ +#define MACHADDR 0x80002000 /* as seen by current processor */ +#define CPU0MACH MACHADDR /* Mach for bootstrap processor */ +#define XENCONSOLE 0x80003000 /* xen console ring */ +#define XENSHARED 0x80004000 /* xen shared page */ +#define XENBUS 0x80005000 /* xenbus aka xenstore ring */ +#define XENGRANTTAB 0x80006000 /* grant table */ + +#define MACHSIZE BY2PG + +/* + * Address spaces + * + * User is at 0-2GB + * Kernel is at 2GB-4GB + */ +#define UZERO 0 /* base of user address space */ +#define UTZERO (UZERO+BY2PG) /* first address in user text */ +#define UTROUND(t) ROUNDUP((t), BY2PG) +#define KZERO 0x80000000 /* base of kernel address space */ +#define KTZERO 0x80010000 /* first address in kernel text */ +#define USTKTOP (KZERO-BY2PG) /* byte just beyond user stack */ +#define USTKSIZE (16*1024*1024) /* size of user stack */ +#define TSTKTOP (USTKTOP-USTKSIZE) /* end of new stack in sysexec */ +#define TSTKSIZ 100 + +/* + * known x86 segments (in GDT) and their selectors + */ +#define NULLSEG 0 /* null segment */ +#define KDSEG 1 /* kernel data/stack */ +#define KESEG 2 /* kernel executable */ +#define UDSEG 3 /* user data/stack */ +#define UESEG 4 /* user executable */ +#define TSSSEG 5 /* task segment */ +#define APMCSEG 6 /* APM code segment */ +#define APMCSEG16 7 /* APM 16-bit code segment */ +#define APMDSEG 8 /* APM data segment */ +#define PROCSEG0 11 /* per process descriptor0 */ +#define NPROCSEG 3 /* number of per process descriptors */ +#define NGDT 13 /* number of GDT entries required */ +/* #define APM40SEG 8 /* APM segment 0x40 */ + +#define SELGDT (0<<2) /* selector is in gdt */ +#define SELLDT (1<<2) /* selector is in ldt */ + +#define SELECTOR(i, t, p) (((i)<<3) | (t) | (p)) + +#define NULLSEL SELECTOR(NULLSEG, SELGDT, 0) +/* these are replaced by XEN entries */ +#ifdef NOPE // XXX investigate more +#define KDSEL SELECTOR(KDSEG, SELGDT, 0) +#define KESEL SELECTOR(KESEG, SELGDT, 0) +#define UESEL SELECTOR(UESEG, SELGDT, 3) +#define UDSEL SELECTOR(UDSEG, SELGDT, 3) +/* comment out to make sure unused ... */ + +#define TSSSEL SELECTOR(TSSSEG, SELGDT, 0) +#define APMCSEL SELECTOR(APMCSEG, SELGDT, 0) +#define APMCSEL16 SELECTOR(APMCSEG16, SELGDT, 0) +#define APMDSEL SELECTOR(APMDSEG, SELGDT, 0) +/* #define APM40SEL SELECTOR(APM40SEG, SELGDT, 0) */ +#else +/* use the selectors that xen gives us */ +#define KESEL FLAT_KERNEL_CS +#define KDSEL FLAT_KERNEL_DS +#define UESEL FLAT_USER_CS +#define UDSEL FLAT_USER_DS +#endif + +/* + * fields in segment descriptors + */ +#define SEGDATA (0x10<<8) /* data/stack segment */ +#define SEGEXEC (0x18<<8) /* executable segment */ +#define SEGTSS (0x9<<8) /* TSS segment */ +#define SEGCG (0x0C<<8) /* call gate */ +#define SEGIG (0x0E<<8) /* interrupt gate */ +#define SEGTG (0x0F<<8) /* trap gate */ +#define SEGTYPE (0x1F<<8) + +#define SEGP (1<<15) /* segment present */ +#define SEGPL(x) ((x)<<13) /* priority level */ +#define SEGB (1<<22) /* granularity 1==4k (for expand-down) */ +#define SEGG (1<<23) /* granularity 1==4k (for other) */ +#define SEGE (1<<10) /* expand down */ +#define SEGW (1<<9) /* writable (for data/stack) */ +#define SEGR (1<<9) /* readable (for code) */ +#define SEGD (1<<22) /* default 1==32bit (for code) */ + +/* + * virtual MMU + */ +#define PTEMAPMEM (1024*1024) +#define PTEPERTAB (PTEMAPMEM/BY2PG) +#define SEGMAPSIZE 1984 +#define SSEGMAPSIZE 16 +#define PPN(x) ((x)&~(BY2PG-1)) +#define PGOFF(x) ((x)&(BY2PG-1)) + +/* + * physical MMU + */ +#define PTEVALID (1<<0) +#define PTEWT (1<<3) +#define PTEUNCACHED (1<<4) +#define PTEWRITE (1<<1) +#define PTERONLY (0<<1) +#define PTEKERNEL (0<<2) +#define PTEUSER (1<<2) +#define PTESIZE (1<<7) +#define PTEGLOBAL (1<<8) + +/* + * Macros for calculating offsets within the page directory base + * and page tables. + */ +#define PAX(va) (paemode? ((ulong)(va)>>29) & 0x6 : 0) +#define PDX(va) (paemode? (((ulong)(va))>>20) & 0x03FE : (((ulong)(va))>>22) & 0x03FF) +#define PTX(va) (paemode? (((ulong)(va))>>11) & 0x03FE : (((ulong)(va))>>12) & 0x03FF) +#define PDB(pdb,va) (paemode? KADDR(MAPPN((pdb)[((ulong)(va)>>29) & 0x6])) : pdb) + +#define getpgcolor(a) 0 diff --git a/sys/src/9/xen/mkfile b/sys/src/9/xen/mkfile new file mode 100644 index 000000000..560efa422 --- /dev/null +++ b/sys/src/9/xen/mkfile @@ -0,0 +1,188 @@ +CONF=xenpcf +CONFLIST=xenpcf + +objtype=386 +</$objtype/mkfile +p=9 + +KTZERO=0x80010000 +KZERO=0x80000000 +KPZERO=0x10000 +PAE=yes + +DEVS=`{rc ../port/mkdevlist $CONF} + +PORT=\ + alarm.$O\ + alloc.$O\ + allocb.$O\ + auth.$O\ + cache.$O\ + chan.$O\ + dev.$O\ + edf.$O\ + fault.$O\ + page.$O\ + parse.$O\ + pgrp.$O\ + portclock.$O\ + print.$O\ + proc.$O\ + qio.$O\ + qlock.$O\ + rebootcmd.$O\ + segment.$O\ + swap.$O\ + sysfile.$O\ + sysproc.$O\ + taslock.$O\ + tod.$O\ + xalloc.$O\ + +XEN=\ + xengrant.$O\ + xentimer.$O\ + xensystem.$O\ + +SCHED=`{ls -p xen-public/sched*.h >[2]/dev/null} +ARCH=`{test -d xen-public/arch-x86 && echo arch-x86/xen-x86_32.h arch-x86/xen.h || echo arch-x86_32.h } + +XENHEADERS=\ + $ARCH\ + xen.h\ + event_channel.h\ + grant_table.h\ + memory.h\ + physdev.h\ + $SCHED\ + io/ring.h\ + io/blkif.h\ + io/console.h\ + io/netif.h\ + io/xenbus.h\ + io/xs_wire.h\ + +OBJ=\ + l.$O\ + plan9l.$O\ + xen.$O\ + main.$O\ + mmu.$O\ + random.$O\ + rdb.$O\ + trap.$O\ + $CONF.root.$O\ + $CONF.rootc.$O\ + $DEVS\ + $PORT\ + $XEN\ + +LIB=\ + /$objtype/lib/libmemlayer.a\ + /$objtype/lib/libmemdraw.a\ + /$objtype/lib/libdraw.a\ + /$objtype/lib/libip.a\ + /$objtype/lib/libsec.a\ + /$objtype/lib/libmp.a\ + /$objtype/lib/libc.a\ + +ETHER=`{echo devether.c ether*.c | sed 's/\.c/.'$O'/g'} +VGA=`{echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'} +SDEV=`{echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'} + +PAE=`{echo $PAE | sed 's/yes/yes[extended-cr3]/'} +XENELF='LOADER=generic,XEN_VER=xen-3.0,ELF_PADDR_OFFSET=0,VIRT_BASE='$KZERO',VIRT_ENTRY='$KTZERO',PAE='$PAE + +#$p$CONF: $CONF.c $OBJ $LIB xenbin +# $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c +# $LD -o $target.tmp -T$KTZERO -l $OBJ $CONF.$O $LIB +# ./xenbin <$target.tmp >$target +# rm $target.tmp +# size $target + +$p$CONF: $CONF.c $OBJ $LIB xenelf + $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c + $LD -o $target.elf -H5 -T$KTZERO -P$KPZERO -l $OBJ $CONF.$O $LIB + ./xenelf $target.elf $target __xen_guest ''$XENELF'' + size $target + +$p$CONF.gz: $p$CONF + #strip -o /fd/1 $p$CONF | gzip -9 > $p$CONF.gz + gzip -9 $p$CONF > $p$CONF.gz + +install:V: $p$CONF $p$CONF.gz + cp $p$CONF $p$CONF.gz /$objtype/ + # import lookout / /n/lookout && cp $p$CONF $p$CONF.gz /n/lookout/$objtype/ + +# copies generated by the rule below +PCHEADERS=uncached.h etherif.h ethermii.h mp.h io.h + +REPCH=`{echo $PCHEADERS | sed 's/\.h//g; s/ /|/g'} +^($REPCH)\.h:R: '../pc/\1.h' + cp $prereq . + +REPCC=`{../port/mkfilelist ../pc} +^($REPCC)\.$O:R: '../pc/\1.c' + $CC $CFLAGS -I. -. ../pc/$stem1.c + +<../boot/bootmkfile +<../port/portmkfile +<|../port/mkbootrules $CONF + +ptclbsum386.$O: ../pc/ptclbsum386.s + $AS $AFLAGS ../pc/ptclbsum386.s + +# we inherited these.. revisit. +$ETHER: etherif.h ../port/netif.h +$SDEV: ../port/sd.h +main.$O: init.h reboot.h +trap.$O: /sys/include/tos.h + +%.$O: /$objtype/include/u.h ../port/lib.h mem.h dat.h fns.h io.h ../port/error.h ../port/portdat.h ../port/portfns.h xendat.h xendefs.h + +xendefs.h: xendat.h + grep '^#define[ ]+FLAT_' xendat.h >$target + +xendat.h: + { echo '#define __i386__ __i386__'; \ + echo '#define __XEN_INTERFACE_VERSION__ 0x00030201'; \ + echo '#define XEN_GUEST_HANDLE_00030205(type) type *'; \ + cat xen-public/^($XENHEADERS) } | \ + ./cppx > $target + +init.h: ../port/initcode.c ../pc/init9.c + $CC ../port/initcode.c + $CC ../pc/init9.c + $LD -l -R1 -o init.out init9.$O initcode.$O /386/lib/libc.a + {echo 'uchar initcode[]={' + strip -o /fd/1 init.out | xd -1x | + sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > init.h + +reboot.h: ../pc/rebootcode.s + $AS ../pc/rebootcode.s + $LD -l -s -T0x1000 -R4 -o reboot.out rebootcode.$O + {echo 'uchar rebootcode[]={' + xd -1x reboot.out | + sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > reboot.h + +acid:V: + $CC -a -w main.c>acid + +dpart: dpart.$O + $LD -o dpart dpart.$O + +xenstore: xenstore.$O + $LD -o xenstore xenstore.$O + +# XXX this is wrong if we're cross-compiling +xenbin: xenbin.$O + $LD -o xenbin xenbin.$O +xenelf: xenelf.$O + $LD -o xenelf xenelf.$O + +%.clean:V: + rm -f $stem.c [9bz]$stem [9bz]$stem.gz 9$stem.elf boot$stem.* reboot.h init.h xendat.h xendefs.h $PCHEADERS dpart xenbin xenelf xenstore + + diff --git a/sys/src/9/xen/mmu.c b/sys/src/9/xen/mmu.c new file mode 100644 index 000000000..b3ce84074 --- /dev/null +++ b/sys/src/9/xen/mmu.c @@ -0,0 +1,595 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +int paemode; +uvlong *xenpdpt; /* this needs to go in Mach for multiprocessor guest */ + +#define LOG(a) +#define PUTMMULOG(a) +#define MFN(pa) (patomfn[(pa)>>PGSHIFT]) +#define MAPPN(x) (paemode? matopfn[*(uvlong*)(&x)>>PGSHIFT]<<PGSHIFT : matopfn[(x)>>PGSHIFT]<<PGSHIFT) + +#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } +#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } +#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ + ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP } + +Segdesc gdt[NGDT] = +{ +[NULLSEG] { 0, 0}, /* null descriptor */ +[KDSEG] DATASEGM(0), /* kernel data/stack */ +[KESEG] EXECSEGM(0), /* kernel code */ +[UDSEG] DATASEGM(3), /* user data/stack */ +[UESEG] EXECSEGM(3), /* user code */ +[TSSSEG] TSSSEGM(0,0), /* tss segment */ +}; + +/* note: pdb must already be pinned */ +static void +taskswitch(Page *pdb, ulong stack) +{ + Tss *tss; + + tss = m->tss; + tss->ss0 = KDSEL; + tss->esp0 = stack; + tss->ss1 = KDSEL; + tss->esp1 = stack; + tss->ss2 = KDSEL; + tss->esp2 = stack; + //tss->cr3 = pdb; + HYPERVISOR_stack_switch(KDSEL, stack); + mmuflushtlb(pdb); +} + +void +mmuflushtlb(Page *pdb) +{ + int s, i; + + if(!paemode){ + if(pdb) + xenptswitch(pdb->pa); + else + xenptswitch(PADDR(m->pdb)); + }else{ + if(pdb){ + s = splhi(); + for(i = 0; i < 3; i++){ + xenupdate((ulong*)&xenpdpt[i], pdb->pa | PTEVALID); + pdb = pdb->next; + } + splx(s); + }else{ + s = splhi(); + for(i = 0; i < 3; i++) + xenupdatema((ulong*)&xenpdpt[i], ((uvlong*)m->pdb)[i]); + splx(s); + } + xentlbflush(); + } +} + +/* + * On processors that support it, we set the PTEGLOBAL bit in + * page table and page directory entries that map kernel memory. + * Doing this tells the processor not to bother flushing them + * from the TLB when doing the TLB flush associated with a + * context switch (write to CR3). Since kernel memory mappings + * are never removed, this is safe. (If we ever remove kernel memory + * mappings, we can do a full flush by turning off the PGE bit in CR4, + * writing to CR3, and then turning the PGE bit back on.) + * + * See also mmukmap below. + * + * Processor support for the PTEGLOBAL bit is enabled in devarch.c. + */ +static void +memglobal(void) +{ + int i, j; + ulong *pde, *pte; + + /* only need to do this once, on bootstrap processor */ + if(m->machno != 0) + return; + + if(!m->havepge) + return; + + pde = m->pdb; + for(i=512; i<1024; i++){ /* 512: start at entry for virtual 0x80000000 */ + if(pde[i] & PTEVALID){ + pde[i] |= PTEGLOBAL; + if(!(pde[i] & PTESIZE)){ + pte = KADDR(pde[i]&~(BY2PG-1)); + for(j=0; j<1024; j++) + if(pte[j] & PTEVALID) + pte[j] |= PTEGLOBAL; + } + } + } +} + +ulong +mmumapframe(ulong va, ulong mfn) +{ + ulong *pte, pdbx; + uvlong ma; + + /* + * map machine frame number to a virtual address. + * When called the pagedir and page table exist, we just + * need to fill in a page table entry. + */ + ma = ((uvlong)mfn<<PGSHIFT) | PTEVALID|PTEWRITE; + pdbx = PDX(va); + pte = KADDR(MAPPN(PDB(m->pdb,va)[pdbx])); + xenupdatema(&pte[PTX(va)], ma); + return va; +} + +void +mmumapcpu0(void) +{ + ulong *pdb, *pte, va, pa, pdbx; + + if(strstr(xenstart->magic, "x86_32p")) + paemode = 1; + hypervisor_virt_start = paemode ? 0xF5800000 : 0xFC000000; + patomfn = (ulong*)xenstart->mfn_list; + matopfn = (ulong*)hypervisor_virt_start; + /* Xen bug ? can't touch top entry in PDPT */ + if(paemode) + hypervisor_virt_start = 0xC0000000; + + /* + * map CPU0MACH at MACHADDR. + * When called the pagedir and page table exist, we just + * need to fill in a page table entry. + */ + pdb = (ulong*)xenstart->pt_base; + va = MACHADDR; + pa = PADDR(CPU0MACH) | PTEVALID|PTEWRITE; + pdbx = PDX(va); + pdb = PDB(pdb, va); + pte = KADDR(MAPPN(pdb[pdbx])); + xenupdate(&pte[PTX(va)], pa); +} + +void +mmuinit(void) +{ +//XXX ulong x; +//XXX ushort ptr[3]; + ulong *pte, npgs, pa; + extern int rtsr(void); + + if(paemode){ + int i; + xenpdpt = (uvlong*)m->pdb; + m->pdb = xspanalloc(32, 32, 0); + /* clear "reserved" bits in initial page directory pointers -- Xen bug? */ + for(i = 0; i < 4; i++) + ((uvlong*)m->pdb)[i] = xenpdpt[i] & ~0x1E6LL; + } + + /* + * So far only memory up to xentop is mapped, map the rest. + * We cant use large pages because our contiguous PA space + * is not necessarily contiguous in MA. + */ + npgs = conf.mem[0].npage; + for(pa=conf.mem[0].base; npgs; npgs--, pa+=BY2PG) { + pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 1); + if(!pte) + panic("mmuinit"); + xenupdate(pte, pa|PTEVALID|PTEWRITE); + } + + memglobal(); + + m->tss = malloc(sizeof(Tss)); + memset(m->tss, 0, sizeof(Tss)); + m->tss->iomap = 0xDFFF<<16; + + /* + * We used to keep the GDT in the Mach structure, but it + * turns out that that slows down access to the rest of the + * page. Since the Mach structure is accessed quite often, + * it pays off anywhere from a factor of 1.25 to 2 on real + * hardware to separate them (the AMDs are more sensitive + * than Intels in this regard). Under VMware it pays off + * a factor of about 10 to 100. + */ + +#ifdef we_dont_set_gdt_or_lidt + memmove(m->gdt, gdt, sizeof gdt); + x = (ulong)m->tss; + m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); + m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; + + ptr[0] = sizeof(gdt)-1; + x = (ulong)m->gdt; + ptr[1] = x & 0xFFFF; + ptr[2] = (x>>16) & 0xFFFF; + lgdt(ptr); + + ptr[0] = sizeof(Segdesc)*256-1; + x = IDTADDR; + ptr[1] = x & 0xFFFF; + ptr[2] = (x>>16) & 0xFFFF; + lidt(ptr); +#endif + +#ifdef we_may_eventually_want_this + /* make kernel text unwritable */ + for(x = KTZERO; x < (ulong)etext; x += BY2PG){ + p = mmuwalk(m->pdb, x, 2, 0); + if(p == nil) + panic("mmuinit"); + *p &= ~PTEWRITE; + } +#endif + + taskswitch(0, (ulong)m + BY2PG); +#ifdef we_dont_do_this + ltr(TSSSEL); +#endif +} + +void +flushmmu(void) +{ + int s; + + s = splhi(); + up->newtlb = 1; + mmuswitch(up); + splx(s); +} + +static ulong* +mmupdb(Page *pg, ulong va) +{ + int i; + + for(i = PAX(va); i > 0; i -= 2) + pg = pg->next; + return (ulong*)pg->va; +} + +/* this can be called with an active pdb, so use Xen calls to zero it out. + */ +static void +mmuptefree(Proc* proc) +{ + ulong *pdb, va; + Page **last, *page; + + if(proc->mmupdb && proc->mmuused){ + last = &proc->mmuused; + for(page = *last; page; page = page->next){ + /* this is no longer a pte page so make it readwrite */ + va = page->daddr; + pdb = mmupdb(proc->mmupdb, va); + xenupdatema(&pdb[PDX(va)], 0); + xenptunpin(page->va); + last = &page->next; + } + *last = proc->mmufree; + proc->mmufree = proc->mmuused; + proc->mmuused = 0; + } +} + +void +mmuswitch(Proc* proc) +{ + //ulong *pdb; + + if(proc->newtlb){ + mmuptefree(proc); + proc->newtlb = 0; + } + + if(proc->mmupdb){ + //XXX doesn't work for some reason, but it's not needed for uniprocessor + //pdb = (ulong*)proc->mmupdb->va; + //xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]); + taskswitch(proc->mmupdb, (ulong)(proc->kstack+KSTACK)); + } + else + taskswitch(0, (ulong)(proc->kstack+KSTACK)); +} + +void +mmurelease(Proc* proc) +{ + Page *page, *next; + + /* + * Release any pages allocated for a page directory base or page-tables + * for this process: + * switch to the prototype pdb for this processor (m->pdb); + * call mmuptefree() to place all pages used for page-tables (proc->mmuused) + * onto the process' free list (proc->mmufree). This has the side-effect of + * cleaning any user entries in the pdb (proc->mmupdb); + * if there's a pdb put it in the cache of pre-initialised pdb's + * for this processor (m->pdbpool) or on the process' free list; + * finally, place any pages freed back into the free pool (palloc). + * This routine is only called from sched() with palloc locked. + */ + taskswitch(0, (ulong)m + BY2PG); + mmuptefree(proc); + + if((page = proc->mmupdb) != 0){ + proc->mmupdb = 0; + while(page){ + next = page->next; + /* its not a page table anymore, mark it rw */ + xenptunpin(page->va); + if(paemode || m->pdbcnt > 10){ + page->next = proc->mmufree; + proc->mmufree = page; + } + else{ + page->next = m->pdbpool; + m->pdbpool = page; + m->pdbcnt++; + } + page = next; + } + } + + for(page = proc->mmufree; page; page = next){ + next = page->next; + if(--page->ref) + panic("mmurelease: page->ref %d\n", page->ref); + pagechainhead(page); + } + if(proc->mmufree && palloc.r.p) + wakeup(&palloc.r); + proc->mmufree = 0; +} + +static Page* +mmupdballoc(ulong va, void *mpdb) +{ + int s; + Page *page; + Page *badpages, *pg; + + s = splhi(); + /* + * All page tables must be read-only. We will mark them + * readwrite later when we free them and they are no + * longer used as page tables. + */ + if(m->pdbpool == 0){ + spllo(); + badpages = 0; + for (;;) { + page = newpage(0, 0, 0); + page->va = VA(kmap(page)); + if(mpdb) + memmove((void*)page->va, mpdb, BY2PG); + else + memset((void*)page->va, 0, BY2PG); + if (xenpgdpin(page->va)) + break; + /* + * XXX Plan 9 is a bit lax about putting pages on the free list when they are + * still mapped (r/w) by some process's page table. From Plan 9's point + * of view this is safe because the any such process will have up->newtlb set, + * so the mapping will be cleared before the process is dispatched. But the Xen + * hypervisor has no way of knowing this, so it refuses to pin the page for use + * as a pagetable. + */ + if(0) print("bad pgdpin %lux va %lux copy %lux %s\n", MFN(PADDR(page->va)), va, (ulong)mpdb, up? up->text: ""); + page->next = badpages; + badpages = page; + } + while (badpages != 0) { + pg = badpages; + badpages = badpages->next; + putpage(pg); + } + } + else{ + page = m->pdbpool; + m->pdbpool = page->next; + m->pdbcnt--; + if (!xenpgdpin(page->va)) + panic("xenpgdpin"); + } + splx(s); + + page->next = 0; + return page; +} + +void +checkmmu(ulong va, ulong pa) +{ + ulong *pdb, *pte; + int pdbx; + + if(up->mmupdb == 0) + return; + + pdb = mmupdb(up->mmupdb, va); + pdbx = PDX(va); + if(MAPPN(pdb[pdbx]) == 0){ + /* okay to be empty - will fault and get filled */ + return; + } + + pte = KADDR(MAPPN(pdb[pdbx])); + if(MAPPN(pte[PTX(va)]) != pa){ + if(!paemode) + print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux (0x%08lux)\n", + up->pid, up->text, + va, pa, pte[PTX(va)], MAPPN(pte[PTX(va)])); + else + print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%16llux (0x%08lux)\n", + up->pid, up->text, + va, pa, *(uvlong*)&pte[PTX(va)], MAPPN(pte[PTX(va)])); + } +} + +void +putmmu(ulong va, ulong pa, Page*) +{ + int pdbx; + Page *page; + Page *badpages, *pg; + ulong *pdb, *pte; + int i, s; + + PUTMMULOG(dprint("putmmu va %lux pa %lux\n", va, pa);) + if(up->mmupdb == 0){ + if(!paemode) + up->mmupdb = mmupdballoc(va, m->pdb); + else { + page = 0; + for(i = 4; i >= 0; i -= 2){ + if(m->pdb[i]) + pg = mmupdballoc(va, KADDR(MAPPN(m->pdb[i]))); + else + pg = mmupdballoc(va, 0); + pg->next = page; + page = pg; + } + up->mmupdb = page; + } + } + pdb = mmupdb(up->mmupdb, va); + pdbx = PDX(va); + + if(PPN(pdb[pdbx]) == 0){ + PUTMMULOG(dprint("new pt page for index %d pdb %lux\n", pdbx, (ulong)pdb);) + /* mark page as readonly before using as a page table */ + if(up->mmufree == 0){ + badpages = 0; + for (;;) { + page = newpage(1, 0, 0); + page->va = VA(kmap(page)); + if (xenptpin(page->va)) + break; + if(0) print("bad pin %lux va %lux %s\n", MFN(PADDR(page->va)), va, up->text); + page->next = badpages; + badpages = page; + } + while (badpages != 0) { + pg = badpages; + badpages = badpages->next; + putpage(pg); + } + } + else { + page = up->mmufree; + up->mmufree = page->next; + memset((void*)page->va, 0, BY2PG); + if (!xenptpin(page->va)) + panic("xenptpin"); + } + + xenupdate(&pdb[pdbx], page->pa|PTEVALID|PTEUSER|PTEWRITE); + + page->daddr = va; + page->next = up->mmuused; + up->mmuused = page; + } + + pte = KADDR(MAPPN(pdb[pdbx])); + PUTMMULOG(dprint("pte %lux index %lud old %lux new %lux mfn %lux\n", (ulong)pte, PTX(va), pte[PTX(va)], pa|PTEUSER, MFN(pa));) + xenupdate(&pte[PTX(va)], pa|PTEUSER); + + s = splhi(); + //XXX doesn't work for some reason, but it's not needed for uniprocessor + //xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]); + mmuflushtlb(up->mmupdb); + splx(s); +} + +ulong* +mmuwalk(ulong* pdb, ulong va, int level, int create) +{ + ulong pa, va2, *table; + + /* + * Walk the page-table pointed to by pdb and return a pointer + * to the entry for virtual address va at the requested level. + * If the entry is invalid and create isn't requested then bail + * out early. Otherwise, for the 2nd level walk, allocate a new + * page-table page and register it in the 1st level. + */ + if(paemode){ + pdb = &pdb[PAX(va)]; + if(!(*pdb & PTEVALID)){ + if(create == 0) + return 0; + panic("mmuwalk: missing pgdir ptr for va=%lux\n", va); + } + pdb = KADDR(MAPPN(*pdb)); + } + table = &pdb[PDX(va)]; + if(!(*table & PTEVALID) && create == 0) + return 0; + + switch(level){ + + default: + return 0; + + case 1: + return table; + + case 2: + if(*table & PTESIZE) + panic("mmuwalk2: va %luX entry %luX\n", va, *table); + if(!(*table & PTEVALID)){ + va2 = (ulong)xspanalloc(BY2PG, BY2PG, 0); + pa = PADDR(va2); + xenptpin(va2); + xenupdate(table, pa|PTEWRITE|PTEVALID); + } + table = KADDR(MAPPN(*table)); + + return &table[PTX(va)]; + } +} + +int +mmukmapsync(ulong va) +{ + USED(va); + return 0; +} + +/* + * More debugging. + */ +void +countpagerefs(ulong *ref, int print) +{ + USED(ref); + USED(print); +} + +/* + * Return the number of bytes that can be accessed via KADDR(pa). + * If pa is not a valid argument to KADDR, return 0. + */ +ulong +cankaddr(ulong pa) +{ + if(pa >= -KZERO) + return 0; + return -KZERO - pa; +} diff --git a/sys/src/9/xen/plan9l.s b/sys/src/9/xen/plan9l.s new file mode 100644 index 000000000..e5e2f541c --- /dev/null +++ b/sys/src/9/xen/plan9l.s @@ -0,0 +1,53 @@ +#include "xendefs.h" +#include "mem.h" + +/* + * This must match io.h. + */ +#define VectorSYSCALL 0x40 + +/* + * Used to get to the first process: + * set up an interrupt return frame and IRET to user level. + */ +TEXT touser(SB), $0 + PUSHL $(UDSEL) /* old ss */ + MOVL sp+0(FP), AX /* old sp */ + PUSHL AX + MOVL $0x200, AX /* interrupt enable flag */ + PUSHL AX /* old flags */ + PUSHL $(UESEL) /* old cs */ + PUSHL $(UTZERO+32) /* old pc */ + MOVL $(UDSEL), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, GS + MOVW AX, FS + IRETL + +/* + * This is merely _strayintr from l.s optimised to vector + * to syscall() without going through trap(). + */ +TEXT _syscallintr(SB), $0 + PUSHL $VectorSYSCALL /* trap type */ + + PUSHL DS + PUSHL ES + PUSHL FS + PUSHL GS + PUSHAL + MOVL $(KDSEL), AX + MOVW AX, DS + MOVW AX, ES + PUSHL SP + CALL syscall(SB) + + POPL AX + POPAL + POPL GS + POPL FS + POPL ES + POPL DS + ADDL $8, SP /* pop error code and trap type */ + IRETL diff --git a/sys/src/9/xen/screen.h b/sys/src/9/xen/screen.h new file mode 100644 index 000000000..bbe6b2512 --- /dev/null +++ b/sys/src/9/xen/screen.h @@ -0,0 +1,186 @@ +typedef struct Cursor Cursor; +typedef struct Cursorinfo Cursorinfo; +struct Cursorinfo { + Cursor; + Lock; +}; + +/* devmouse.c */ +extern void mousetrack(int, int, int, int); +extern void absmousetrack(int, int, int, int); +extern Point mousexy(void); + +extern void mouseaccelerate(int); +extern int m3mouseputc(Queue*, int); +extern int m5mouseputc(Queue*, int); +extern int mouseputc(Queue*, int); + +extern Cursorinfo cursor; +extern Cursor arrow; + +/* + * Generic VGA registers. + */ +enum { + MiscW = 0x03C2, /* Miscellaneous Output (W) */ + MiscR = 0x03CC, /* Miscellaneous Output (R) */ + Status0 = 0x03C2, /* Input status 0 (R) */ + Status1 = 0x03DA, /* Input Status 1 (R) */ + FeatureR = 0x03CA, /* Feature Control (R) */ + FeatureW = 0x03DA, /* Feature Control (W) */ + + Seqx = 0x03C4, /* Sequencer Index, Data at Seqx+1 */ + Crtx = 0x03D4, /* CRT Controller Index, Data at Crtx+1 */ + Grx = 0x03CE, /* Graphics Controller Index, Data at Grx+1 */ + Attrx = 0x03C0, /* Attribute Controller Index and Data */ + + PaddrW = 0x03C8, /* Palette Address Register, write */ + Pdata = 0x03C9, /* Palette Data Register */ + Pixmask = 0x03C6, /* Pixel Mask Register */ + PaddrR = 0x03C7, /* Palette Address Register, read */ + Pstatus = 0x03C7, /* DAC Status (RO) */ + + Pcolours = 256, /* Palette */ + Pred = 0, + Pgreen = 1, + Pblue = 2, + + Pblack = 0x00, + Pwhite = 0xFF, +}; + +#define VGAMEM() 0xA0000 +#define vgai(port) inb(port) +#define vgao(port, data) outb(port, data) + +extern int vgaxi(long, uchar); +extern int vgaxo(long, uchar, uchar); + +/* + */ +typedef struct VGAdev VGAdev; +typedef struct VGAcur VGAcur; +typedef struct VGAscr VGAscr; + +struct VGAdev { + char* name; + + void (*enable)(VGAscr*); + void (*disable)(VGAscr*); + void (*page)(VGAscr*, int); + void (*linear)(VGAscr*, int, int); + void (*drawinit)(VGAscr*); + int (*fill)(VGAscr*, Rectangle, ulong); + void (*ovlctl)(VGAscr*, Chan*, void*, int); + int (*ovlwrite)(VGAscr*, void*, int, vlong); + void (*flush)(VGAscr*, Rectangle); +}; + +struct VGAcur { + char* name; + + void (*enable)(VGAscr*); + void (*disable)(VGAscr*); + void (*load)(VGAscr*, Cursor*); + int (*move)(VGAscr*, Point); + + int doespanning; +}; + +/* + */ +struct VGAscr { + Lock devlock; + VGAdev* dev; + Pcidev* pci; + + VGAcur* cur; + ulong storage; + Cursor; + + int useflush; + + ulong paddr; /* frame buffer */ + void* vaddr; + int apsize; + + ulong io; /* device specific registers */ + ulong *mmio; + + ulong colormap[Pcolours][3]; + int palettedepth; + + Memimage* gscreen; + Memdata* gscreendata; + Memsubfont* memdefont; + + int (*fill)(VGAscr*, Rectangle, ulong); + int (*scroll)(VGAscr*, Rectangle, Rectangle); + void (*blank)(VGAscr*, int); + ulong id; /* internal identifier for driver use */ + int isblank; + int overlayinit; + int softscreen; +}; + +extern VGAscr vgascreen[]; + +enum { + Backgnd = 0, /* black */ +}; + +/* mouse.c */ +extern void mousectl(Cmdbuf*); +extern void mouseresize(void); +extern void mouseredraw(void); + +/* screen.c */ +extern int hwaccel; /* use hw acceleration; default on */ +extern int hwblank; /* use hw blanking; default on */ +extern int panning; /* use virtual screen panning; default off */ +extern void addvgaseg(char*, ulong, ulong); +extern uchar* attachscreen(Rectangle*, ulong*, int*, int*, int*); +extern void flushmemscreen(Rectangle); +extern void cursoron(void); +extern void cursoroff(void); +extern void setcursor(Cursor*); +extern int screensize(int, int, int, ulong); +extern int screenaperture(int, int); +extern Rectangle physgscreenr; /* actual monitor size */ +extern void blankscreen(int); + +extern VGAcur swcursor; +extern void swcursorinit(void); +extern void swcursorhide(void); +extern void swcursoravoid(Rectangle); +extern void swcursorunhide(void); + +/* devdraw.c */ +extern void deletescreenimage(void); +extern void resetscreenimage(void); +extern int drawhasclients(void); +extern ulong blanktime; +extern void setscreenimageclipr(Rectangle); +extern void drawflush(void); +extern int drawidletime(void); +extern QLock drawlock; + +/* vga.c */ +extern void vgascreenwin(VGAscr*); +extern void vgaimageinit(ulong); +extern void vgalinearpci(VGAscr*); +extern void vgalinearaddr(VGAscr*, ulong, int); + +extern void drawblankscreen(int); +extern void vgablank(VGAscr*, int); + +extern Lock vgascreenlock; + +#define ishwimage(i) (vgascreen[0].gscreendata && (i)->data->bdata == vgascreen[0].gscreendata->bdata) + +/* swcursor.c */ +void swcursorhide(void); +void swcursoravoid(Rectangle); +void swcursordraw(Point); +void swcursorload(Cursor *); +void swcursorinit(void); diff --git a/sys/src/9/xen/sdxen.c b/sys/src/9/xen/sdxen.c new file mode 100644 index 000000000..1973b5f8d --- /dev/null +++ b/sys/src/9/xen/sdxen.c @@ -0,0 +1,356 @@ +/* + * Xen block storage device frontend + * + * The present implementation follows the principle of + * "what's the simplest thing that could possibly work?". + * We can think about performance later. + * We can think about dynamically attaching and removing devices later. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +#define LOG(a) + +/* + * conversions to machine page numbers, pages and addresses + */ +#define MFN(pa) (patomfn[(pa)>>PGSHIFT]) +#define MFNPG(pa) (MFN(pa)<<PGSHIFT) +#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa)) +#define VA2MA(va) PA2MA(PADDR(va)) +#define VA2MFN(va) MFN(PADDR(va)) + +enum { + Ndevs = 4, + MajorDevSD = 0x800, + MajorDevHDA = 0x300, + MajorDevHDC = 0x1600, + MajorDevXVD = 0xCA00, +}; + +extern SDifc sdxenifc; + +typedef struct Ctlr Ctlr; + +struct Ctlr { + int online; + ulong secsize; + ulong sectors; + int backend; + int devid; + int evtchn; + blkif_front_ring_t ring; + int ringref; + Lock ringlock; + char *frame; + QLock iolock; + int iodone; + Rendez wiodone; +}; + +static int +ringinit(Ctlr *ctlr, char *a) +{ + blkif_sring_t *sr; + + sr = (blkif_sring_t*)a; + memset(sr, 0, BY2PG); + SHARED_RING_INIT(sr); + FRONT_RING_INIT(&ctlr->ring, sr, BY2PG); + ctlr->ringref = shareframe(ctlr->backend, sr, 1); + return BY2PG; +} + +static int +vbdsend(Ctlr *ctlr, int write, int ref, int nb, uvlong bno) +{ + blkif_request_t *req; + int i, notify; + + ilock(&ctlr->ringlock); // XXX conservative + i = ctlr->ring.req_prod_pvt; + req = RING_GET_REQUEST(&ctlr->ring, i); // XXX overflow? + + req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; + req->nr_segments = 1; + req->handle = ctlr->devid; + req->id = 1; + req->sector_number = bno; + req->seg[0].gref = ref; + req->seg[0].first_sect = 0; + req->seg[0].last_sect = nb-1; + + ctlr->ring.req_prod_pvt = i+1; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->ring, notify); + iunlock(&ctlr->ringlock); + return notify; +} + +static void +backendconnect(Ctlr *ctlr) +{ + char dir[64]; + char buf[64]; + + sprint(dir, "device/vbd/%d/", ctlr->devid); + xenstore_setd(dir, "ring-ref", ctlr->ringref); + xenstore_setd(dir, "event-channel", ctlr->evtchn); + xenstore_setd(dir, "state", XenbusStateInitialised); + xenstore_gets(dir, "backend", buf, sizeof buf); + sprint(dir, "%s/", buf); + HYPERVISOR_yield(); + xenstore_gets(dir, "state", buf, sizeof buf); + while (strtol(buf, 0, 0) != XenbusStateConnected) { + print("sdxen: waiting for vbd %d to connect\n", ctlr->devid); + tsleep(&up->sleep, return0, 0, 1000); + xenstore_gets(dir, "state", buf, sizeof buf); + } + xenstore_gets(dir, "sector-size", buf, sizeof buf); + ctlr->secsize = strtol(buf, 0, 0); + xenstore_gets(dir, "sectors", buf, sizeof buf); + ctlr->sectors = strtol(buf, 0, 0); + print("sdxen: backend %s secsize %ld sectors %ld\n", dir, ctlr->secsize, ctlr->sectors); + if (ctlr->secsize > BY2PG) + panic("sdxen: sector size bigger than mmu page size"); +} + +static void +backendactivate(Ctlr *ctlr) +{ + char dir[64]; + + sprint(dir, "device/vbd/%d/", ctlr->devid); + xenstore_setd(dir, "state", XenbusStateConnected); +} + +static SDev* +xenpnp(void) +{ + SDev *sdev[Ndevs]; + static char idno[Ndevs] = { '0', 'C', 'D', 'E' }; + static char nunit[Ndevs] = { 8, 2, 2, 8 }; + int i; + + for (i = 0; i < Ndevs; i++) { + sdev[i] = mallocz(sizeof(SDev), 1); + sdev[i]->ifc = &sdxenifc; + sdev[i]->idno = idno[i]; + sdev[i]->nunit = nunit[i]; + sdev[i]->ctlr = (Ctlr**)mallocz(sdev[i]->nunit*sizeof(Ctlr*), 1); + if (i > 0) + sdev[i]->next = sdev[i-1]; + } + return sdev[Ndevs-1]; +} + +static int +linuxdev(int idno, int subno) +{ + switch (idno) { + case '0': + return MajorDevSD + 16*subno; + case 'C': + return MajorDevHDA + 64*subno; + case 'D': + return MajorDevHDC + 64*subno; + case 'E': + return MajorDevXVD + 16*subno; + default: + return 0; + } +} + +static int +xenverify(SDunit *unit) +{ + Ctlr *ctlr; + char dir[64]; + char buf[64]; + int devid; + int npage; + char *p; + + if (unit->subno > unit->dev->nunit) + return 0; + devid = linuxdev(unit->dev->idno, unit->subno); + sprint(dir, "device/vbd/%d/", devid); + if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0) + return 0; + + ctlr = mallocz(sizeof(Ctlr), 1); + ((Ctlr**)unit->dev->ctlr)[unit->subno] = ctlr; + ctlr->devid = devid; + ctlr->backend = strtol(buf, 0, 0); + + npage = 2; + p = xspanalloc(npage<<PGSHIFT, BY2PG, 0); + p += ringinit(ctlr, p); + ctlr->frame = p; + ctlr->evtchn = xenchanalloc(ctlr->backend); + backendconnect(ctlr); + + unit->inquiry[0] = 0; // XXX how do we know if it's a CD? + unit->inquiry[2] = 2; + unit->inquiry[3] = 2; + unit->inquiry[4] = sizeof(unit->inquiry)-4; + strcpy((char*)&unit->inquiry[8], "Xen block device"); + + return 1; +} + +static int +wiodone(void *a) +{ + return ((Ctlr*)a)->iodone != 0; +} + +static void +sdxenintr(Ureg *, void *a) +{ + Ctlr *ctlr = a; + blkif_response_t *rsp; + int i, avail; + + ilock(&ctlr->ringlock); // XXX conservative + for (;;) { + RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->ring, avail); + if (!avail) + break; + i = ctlr->ring.rsp_cons; + rsp = RING_GET_RESPONSE(&ctlr->ring, i); + LOG(dprint("sdxen rsp %llud %d %d\n", rsp->id, rsp->operation, rsp->status);) + if (rsp->status == BLKIF_RSP_OKAY) + ctlr->iodone = 1; + else + ctlr->iodone = -1; + ctlr->ring.rsp_cons = ++i; + } + iunlock(&ctlr->ringlock); + if (ctlr->iodone) + wakeup(&ctlr->wiodone); +} + +static Ctlr *kickctlr; + +static void +kickme(void) +{ + Ctlr *ctlr = kickctlr; + shared_info_t *s; + + if (ctlr) { + s = HYPERVISOR_shared_info; + dprint("tick %d %d prod %d cons %d pending %x mask %x\n", + m->ticks, ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons, + s->evtchn_pending[0], s->evtchn_mask[0]); + sdxenintr(0, ctlr); + } +} + +static int +xenonline(SDunit *unit) +{ + Ctlr *ctlr; + + ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno]; + unit->sectors = ctlr->sectors; + unit->secsize = ctlr->secsize; + if (ctlr->online == 0) { + intrenable(ctlr->evtchn, sdxenintr, ctlr, BUSUNKNOWN, "vbd"); + //kickctlr = ctlr; + //addclock0link(kickme, 10000); + backendactivate(ctlr); + ctlr->online = 1; + } + + return 1; +} + +static int +xenrio(SDreq*) +{ + return -1; +} + +static long +xenbio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno) +{ + Ctlr *ctlr; + char *buf; + long bcount, len; + int ref; + int n; + + USED(lun); // XXX meaningless + ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno]; + LOG(("xenbio %c %lux %ld %lld\n", write? 'w' : 'r', (ulong)data, nb, bno);) + buf = data; + // XXX extra copying & fragmentation could be avoided by + // redefining sdmalloc() to get page-aligned buffers + if ((ulong)data&(BY2PG-1)) + buf = ctlr->frame; + bcount = BY2PG/unit->secsize; + qlock(&ctlr->iolock); + for (n = nb; n > 0; n -= bcount) { + ref = shareframe(ctlr->backend, buf, !write); + if (bcount > n) + bcount = n; + len = bcount*unit->secsize; + if (write && buf == ctlr->frame) + memmove(buf, data, len); + ctlr->iodone = 0; + if (vbdsend(ctlr, write, ref, bcount, bno)) + xenchannotify(ctlr->evtchn); + LOG(dprint("sleeping %d prod %d cons %d pending %x mask %x \n", ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons, + HYPERVISOR_shared_info->evtchn_pending[0], HYPERVISOR_shared_info->evtchn_mask[0]);) + sleep(&ctlr->wiodone, wiodone, ctlr); + xengrantend(ref); + if (ctlr->iodone < 0) { + qunlock(&ctlr->iolock); + return -1; + } + if (buf == ctlr->frame) { + if (!write) + memmove(data, buf, len); + data = (char*)data + len; + } else + buf += len; + bno += bcount; + } + qunlock(&ctlr->iolock); + return (nb-n)*unit->secsize; +} + +static void +xenclear(SDev *) +{ +} + +SDifc sdxenifc = { + "xen", /* name */ + + xenpnp, /* pnp */ + 0, /* legacy */ + 0, /* enable */ + 0, /* disable */ + + xenverify, /* verify */ + xenonline, /* online */ + xenrio, /* rio */ + 0, /* rctl */ + 0, /* wctl */ + + xenbio, /* bio */ + 0, /* probe */ + xenclear, /* clear */ + 0, /* stat */ +}; diff --git a/sys/src/9/xen/trap.c b/sys/src/9/xen/trap.c new file mode 100644 index 000000000..b47fc2c47 --- /dev/null +++ b/sys/src/9/xen/trap.c @@ -0,0 +1,1108 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" +#include <trace.h> + +#define INTRLOG(a) +#define SETUPLOG(a) +#define SYSCALLLOG(a) +#define FAULTLOG(a) +#define FAULTLOGFAST(a) +#define POSTNOTELOG(a) +#define TRAPLOG(a) + +int faultpanic = 0; + +enum { + /* trap_info_t flags */ + SPL0 = 0, + SPL3 = 3, + EvDisable = 4, +}; + +void noted(Ureg*, ulong); + +static void debugbpt(Ureg*, void*); +static void fault386(Ureg*, void*); +static void safe_fault386(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void _dumpstack(Ureg*); + +static Lock vctllock; +static Vctl *vctl[256]; + +enum +{ + Ntimevec = 20 /* number of time buckets for each intr */ +}; +ulong intrtimes[256][Ntimevec]; + +void +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + +/**/ + SETUPLOG(dprint("intrenable: irq %d, f %p, a %p, tbdf 0x%x, name %s\n", + irq, f, a, tbdf, name);) +/**/ + if(f == nil){ + print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n", + irq, tbdf, name); + return; + } + + v = xalloc(sizeof(Vctl)); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN-1); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + vno = arch->intrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n", + irq, tbdf, v->name); + xfree(v); + return; + } + if(vctl[vno]){ + if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %p %p %p %p\n", + vctl[vno]->name, v->name, + vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi); + v->next = vctl[vno]; + } + vctl[vno] = v; + SETUPLOG(dprint("INTRENABLE: vctl[%d] is %p\n", vno, vctl[vno]);) + iunlock(&vctllock); +} + +int +intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name) +{ + Vctl **pv, *v; + int vno; + + /* + * For now, none of this will work with the APIC code, + * there is no mapping between irq and vector as the IRQ + * is pretty meaningless. + */ + if(arch->intrvecno == nil) + return -1; + vno = arch->intrvecno(irq); + ilock(&vctllock); + pv = &vctl[vno]; + while (*pv && + ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a || + strcmp((*pv)->name, name))) + pv = &((*pv)->next); + assert(*pv); + + v = *pv; + *pv = (*pv)->next; /* Link out the entry */ + + if(vctl[vno] == nil && arch->intrdisable != nil) + arch->intrdisable(irq); + iunlock(&vctllock); + xfree(v); + return 0; +} + +static long +irqallocread(Chan*, void *vbuf, long n, vlong offset) +{ + char *buf, *p, str[2*(11+1)+KNAMELEN+1+1]; + int m, vno; + long oldn; + Vctl *v; + + if(n < 0 || offset < 0) + error(Ebadarg); + + oldn = n; + buf = vbuf; + for(vno=0; vno<nelem(vctl); vno++){ + for(v=vctl[vno]; v; v=v->next){ + m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name); + if(m <= offset) /* if do not want this, skip entry */ + offset -= m; + else{ + /* skip offset bytes */ + m -= offset; + p = str+offset; + offset = 0; + + /* write at most max(n,m) bytes */ + if(m > n) + m = n; + memmove(buf, p, m); + n -= m; + buf += m; + + if(n == 0) + return oldn; + } + } + } + return oldn - n; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= VectorPIC) + panic("trapenable: vno %d\n", vno); + v = xalloc(sizeof(Vctl)); + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN); + v->name[KNAMELEN-1] = 0; + + lock(&vctllock); + if(vctl[vno]) + v->next = vctl[vno]->next; + vctl[vno] = v; + unlock(&vctllock); +} + +static void +nmienable(void) +{ + /* leave this here in case plan 9 ever makes it to dom0 */ +#ifdef NOWAY + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +#endif +} + +/* we started out doing the 'giant bulk init' for all traps. + * we're going to do them one-by-one since error analysis is + * so much easier that way. + */ +void +trapinit(void) +{ + trap_info_t t[2]; + ulong vaddr; + int v, flag; + + HYPERVISOR_set_callbacks( + KESEL, (ulong)hypervisor_callback, + KESEL, (ulong)failsafe_callback); + + /* XXX rework as single hypercall once debugged */ + t[1].address = 0; + vaddr = (ulong)vectortable; + for(v = 0; v < 256; v++){ + switch(v){ + case VectorBPT: + case VectorSYSCALL: + flag = SPL3 | EvDisable; + break; + default: + flag = SPL0 | EvDisable; + break; + } + t[0] = (trap_info_t){ v, flag, KESEL, vaddr }; + if(HYPERVISOR_set_trap_table(t) < 0) + panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n", + t[0].vector, t[0].flags, t[0].cs, t[0].address); + vaddr += 6; + } + + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(VectorBPT, debugbpt, 0, "debugpt"); + trapenable(VectorPF, fault386, 0, "fault386"); + trapenable(Vector2F, doublefault, 0, "doublefault"); + trapenable(Vector15, unexpected, 0, "unexpected"); + + nmienable(); + addarchfile("irqalloc", 0444, irqallocread, nil); +} + +static char* excname[32] = { + "divide error", + "debug exception", + "nonmaskable interrupt", + "breakpoint", + "overflow", + "bounds check", + "invalid opcode", + "coprocessor not available", + "double fault", + "coprocessor segment overrun", + "invalid TSS", + "segment not present", + "stack exception", + "general protection violation", + "page fault", + "15 (reserved)", + "coprocessor error", + "alignment check", + "machine check", + "19 (reserved)", + "20 (reserved)", + "21 (reserved)", + "22 (reserved)", + "23 (reserved)", + "24 (reserved)", + "25 (reserved)", + "26 (reserved)", + "27 (reserved)", + "28 (reserved)", + "29 (reserved)", + "30 (reserved)", + "31 (reserved)", +}; + +/* + * keep histogram of interrupt service times + */ +void +intrtime(Mach*, int vno) +{ + ulong diff; + ulong x; + + x = perfticks(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + + diff /= m->cpumhz*100; // quantum = 100µsec + if(diff >= Ntimevec) + diff = Ntimevec-1; + intrtimes[vno][diff]++; +} + +/* go to user space */ +void +kexit(Ureg*) +{ + uvlong t; + Tos *tos; + + /* precise time accounting, kernel exit */ + tos = (Tos*)(USTKTOP-sizeof(Tos)); + cycles(&t); + tos->kcycles += t - up->kentry; + tos->pcycles = up->pcycles; + tos->pid = up->pid; + INTRLOG(dprint("leave kexit, TOS %p\n", tos);) +} + +/* + * All traps come here. It is slower to have all traps call trap() + * rather than directly vectoring the handler. However, this avoids a + * lot of code duplication and possible bugs. The only exception is + * VectorSYSCALL. + * Trap is called with interrupts (and events) disabled via interrupt-gates. + */ +void +trap(Ureg* ureg) +{ + int clockintr, i, vno, user; + char buf[ERRMAX]; + Vctl *ctl, *v; + Mach *mach; + + TRAPLOG(dprint("trap ureg %lux %lux\n", (ulong*)ureg, ureg->trap);) + m->perf.intrts = perfticks(); + user = (ureg->cs & 0xFFFF) == UESEL; + if(user){ + up->dbgreg = ureg; + cycles(&up->kentry); + } + + clockintr = 0; + + vno = ureg->trap; + if(vno < 0 || vno >= 256) + panic("bad interrupt number %d\n", vno); + TRAPLOG(dprint("trap: vno is 0x%x, vctl[%d] is %p\n", vno, vno, vctl[vno]);) + if(ctl = vctl[vno]){ + INTRLOG(dprint("ctl is %p, isintr is %d\n", ctl, ctl->isintr);) + if(ctl->isintr){ + m->intr++; + if(vno >= VectorPIC && vno != VectorSYSCALL) + m->lastintr = ctl->irq; + } + + INTRLOG(dprint("ctl %p, isr %p\n", ctl, ctl->isr);) + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + INTRLOG(dprint("ctl %p, f is %p\n", v, v->f);) + if(v->f) + v->f(ureg, v->a); + } + INTRLOG(dprint("ctl %p, eoi %p\n", ctl, ctl->eoi);) + if(ctl->eoi) + ctl->eoi(vno); + + if(ctl->isintr){ + intrtime(m, vno); + + //if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) + if (ctl->tbdf != BUSUNKNOWN && ctl->irq == VIRQ_TIMER) + clockintr = 1; + + if(up && !clockintr) + preempted(); + } + } + else if(vno <= nelem(excname) && user){ + spllo(); + sprint(buf, "sys: trap: %s", excname[vno]); + postnote(up, 1, buf, NDebug); + } + else if(vno >= VectorPIC && vno != VectorSYSCALL){ + /* + * An unknown interrupt. + * Check for a default IRQ7. This can happen when + * the IRQ input goes away before the acknowledge. + * In this case, a 'default IRQ7' is generated, but + * the corresponding bit in the ISR isn't set. + * In fact, just ignore all such interrupts. + */ + + /* call all interrupt routines, just in case */ + for(i = VectorPIC; i <= MaxIrqLAPIC; i++){ + ctl = vctl[i]; + if(ctl == nil) + continue; + if(!ctl->isintr) + continue; + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + /* should we do this? */ + if(ctl->eoi) + ctl->eoi(i); + } + + iprint("cpu%d: spurious interrupt %d, last %d\n", + m->machno, vno, m->lastintr); + if(0)if(conf.nmach > 1){ + for(i = 0; i < 32; i++){ + if(!(active.machs & (1<<i))) + continue; + mach = MACHP(i); + if(m->machno == mach->machno) + continue; + print(" cpu%d: last %d", + mach->machno, mach->lastintr); + } + print("\n"); + } + m->spuriousintr++; + if(user) + kexit(ureg); + return; + } + else{ + if(vno == VectorNMI){ + nmienable(); + if(m->machno != 0){ + print("cpu%d: PC %8.8luX\n", + m->machno, ureg->pc); + for(;;); + } + } + dumpregs(ureg); + if(!user){ + ureg->sp = (ulong)&ureg->sp; + _dumpstack(ureg); + } + if(vno < nelem(excname)) + panic("%s", excname[vno]); + panic("unknown trap/intr: %d\n", vno); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + INTRLOG(dprint("calling sched in trap? \n");) + sched(); + INTRLOG(dprint("Back from calling sched in trap?\n");) + splhi(); + } + + if(user){ + if(up->procctl || up->nnote) + notify(ureg); + kexit(ureg); + } + + if (ureg->trap == 0xe) { + /* + * on page fault, we need to restore the old spl + * Xen won't do it for us. + * XXX verify this. + */ + if (ureg->flags & 0x200) + spllo(); + } +} + +void +dumpregs2(Ureg* ureg) +{ + if(up) + print("cpu%d: registers for %s %lud\n", + m->machno, up->text, up->pid); + else + print("cpu%d: registers for kernel\n", m->machno); + print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX", + ureg->flags, ureg->trap, ureg->ecode, ureg->pc); + print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp); + print(" AX %8.8luX BX %8.8luX CX %8.8luX DX %8.8luX\n", + ureg->ax, ureg->bx, ureg->cx, ureg->dx); + print(" SI %8.8luX DI %8.8luX BP %8.8luX\n", + ureg->si, ureg->di, ureg->bp); + print(" CS %4.4luX DS %4.4luX ES %4.4luX FS %4.4luX GS %4.4luX\n", + ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF, + ureg->fs & 0xFFFF, ureg->gs & 0xFFFF); +} + +void +dumpregs(Ureg* ureg) +{ + extern ulong etext; + + dumpregs2(ureg); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + print("SKIPPING get of crx and other such stuff.\n");/* */ +#ifdef NOT + print(" CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux", + getcr0(), getcr2(), getcr3()); + if(m->cpuiddx & 0x9A){ + print(" CR4 %8.8lux", getcr4()); + if((m->cpuiddx & 0xA0) == 0xA0){ + rdmsr(0x00, &mca); + rdmsr(0x01, &mct); + print("\n MCA %8.8llux MCT %8.8llux", mca, mct); + } + } +#endif + print("\n ur %lux up %lux\n", (ulong)ureg, (ulong)up); +} + + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + ureg.pc = getcallerpc(&fn); + ureg.sp = (ulong)&fn; + fn(&ureg); +} + +static void +_dumpstack(Ureg *ureg) +{ + ulong l, v, i, estack; + extern ulong etext; + int x; + + if(getconf("*nodumpstack")){ + iprint("dumpstack disabled\n"); + return; + } + iprint("dumpstack\n"); + x = 0; + x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp); + i = 0; + if(up + && (ulong)&l >= (ulong)up->kstack + && (ulong)&l <= (ulong)up->kstack+KSTACK) + estack = (ulong)up->kstack+KSTACK; + else if((ulong)&l >= (ulong)m->stack + && (ulong)&l <= (ulong)m+BY2PG) + estack = (ulong)m+MACHSIZE; + else + return; + x += print("estackx %.8lux\n", estack); + + for(l=(ulong)&l; l<estack; l+=4){ + v = *(ulong*)l; + if((KTZERO < v && v < (ulong)&etext) || estack-l<32){ + /* + * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8) + * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0), + * but this is too clever and misses faulting address. + */ + x += print("%.8lux=%.8lux ", l, v); + i++; + } + if(i == 4){ + i = 0; + x += print("\n"); + } + } + if(i) + print("\n"); + print("EOF\n"); +} + +void +dumpstack(void) +{ + callwithureg(_dumpstack); +} + +static void +debugbpt(Ureg* ureg, void*) +{ + char buf[ERRMAX]; + print("debugbpt\n"); + if(up == 0) + panic("kernel bpt"); + /* restore pc to instruction that caused the trap */ + ureg->pc--; + sprint(buf, "sys: breakpoint"); + postnote(up, 1, buf, NDebug); + print("debugbpt for proc %lud\n", up->pid); +} + +static void +doublefault(Ureg*, void*) +{ + panic("double fault"); +} + +static void +unexpected(Ureg* ureg, void*) +{ + print("unexpected trap %lud; ignoring\n", ureg->trap); +} + +static void +fault386(Ureg* ureg, void* ) +{ + ulong addr; + int read, user, n, insyscall; + char buf[ERRMAX]; + + addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2; + if (faultpanic) { + dprint("cr2 is 0x%lx\n", addr); + //dumpregs(ureg); + dumpstack(); + panic("fault386"); + exit(1); + } + + user = (ureg->cs & 0xFFFF) == UESEL; + if(!user && mmukmapsync(addr)) + return; + read = !(ureg->ecode & 2); + if(up == nil) + panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr); + insyscall = up->insyscall; + up->insyscall = 1; + n = fault(addr, read); + if(n < 0){ + if(!user){ + dumpregs(ureg); + panic("fault: 0x%lux\n", addr); + } + sprint(buf, "sys: trap: fault %s addr=0x%lux", + read? "read" : "write", addr); + dprint("Posting %s to %lud\n", buf, up->pid); + postnote(up, 1, buf, NDebug); + } + up->insyscall = insyscall; + FAULTLOG(dprint("fault386: all done\n");) +} + +/* + * system calls + */ +#include "../port/systab.h" + +/* + * Syscall is called directly from assembler without going through trap(). + */ +void +syscall(Ureg* ureg) +{ + char *e; + ulong sp; + long ret; + int i, s; + ulong scallnr; + + SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n", + up->pid, ureg->ax, sysctab[ureg->ax]);) + + if((ureg->cs & 0xFFFF) != UESEL) + panic("syscall: cs 0x%4.4luX\n", ureg->cs); + + cycles(&up->kentry); + + m->syscall++; + up->insyscall = 1; + up->pc = ureg->pc; + up->dbgreg = ureg; + + if(up->procctl == Proc_tracesyscall){ + up->procctl = Proc_stopme; + procctl(up); + } + + scallnr = ureg->ax; + up->scallnr = scallnr; + if(scallnr == RFORK && up->fpstate == FPactive){ + fpsave(&up->fpsave); + up->fpstate = FPinactive; + } + spllo(); + + sp = ureg->usp; + up->nerrlab = 0; + ret = -1; + if(!waserror()){ + if(scallnr >= nsyscall || systab[scallnr] == 0){ + pprint("bad sys call number %lud pc %lux\n", + scallnr, ureg->pc); + postnote(up, 1, "sys: bad sys call", NDebug); + error(Ebadarg); + } + + if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD)) + validaddr(sp, sizeof(Sargs)+BY2WD, 0); + + up->s = *((Sargs*)(sp+BY2WD)); + up->psstate = sysctab[scallnr]; + + ret = systab[scallnr]((va_list)up->s.args); + poperror(); + }else{ + /* failure: save the error buffer for errstr */ + e = up->syserrstr; + up->syserrstr = up->errstr; + up->errstr = e; + if(0 && up->pid == 1) + print("syscall %lud error %s\n", scallnr, up->syserrstr); + } + if(up->nerrlab){ + print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab); + for(i = 0; i < NERR; i++) + print("sp=%lux pc=%lux\n", + up->errlab[i].sp, up->errlab[i].pc); + panic("error stack"); + } + + SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);) + /* + * Put return value in frame. On the x86 the syscall is + * just another trap and the return value from syscall is + * ignored. On other machines the return value is put into + * the results register by caller of syscall. + */ + ureg->ax = ret; + + if(up->procctl == Proc_tracesyscall){ + up->procctl = Proc_stopme; + s = splhi(); + procctl(up); + splx(s); + } + + up->insyscall = 0; + up->psstate = 0; + INTRLOG(dprint("cleared insyscall\n");) + if(scallnr == NOTED) + noted(ureg, *(ulong*)(sp+BY2WD)); + + if(scallnr!=RFORK && (up->procctl || up->nnote)){ + splhi(); + notify(ureg); + } + /* if we delayed sched because we held a lock, sched now */ + if(up->delaysched) + sched(); + INTRLOG(dprint("before kexit\n");) + kexit(ureg); +} + +/* + * Call user, if necessary, with note. + * Pass user the Ureg struct and the note on his stack. + */ +int +notify(Ureg* ureg) +{ + int l; + ulong s, sp; + Note *n; + + if(up->procctl) + procctl(up); + if(up->nnote == 0) + return 0; + + if(up->fpstate == FPactive){ + fpsave(&up->fpsave); + up->fpstate = FPinactive; + } + up->fpstate |= FPillegal; + + s = spllo(); + qlock(&up->debug); + up->notepending = 0; + n = &up->note[0]; + if(strncmp(n->msg, "sys:", 4) == 0){ + l = strlen(n->msg); + if(l > ERRMAX-15) /* " pc=0x12345678\0" */ + l = ERRMAX-15; + sprint(n->msg+l, " pc=0x%.8lux", ureg->pc); + } + + if(n->flag!=NUser && (up->notified || up->notify==0)){ + if(n->flag == NDebug) + pprint("suicide: %s\n", n->msg); + qunlock(&up->debug); + pexit(n->msg, n->flag!=NDebug); + } + + if(up->notified){ + qunlock(&up->debug); + splhi(); + return 0; + } + + if(!up->notify){ + qunlock(&up->debug); + pexit(n->msg, n->flag!=NDebug); + } + sp = ureg->usp; + sp -= sizeof(Ureg); + + if(!okaddr((ulong)up->notify, 1, 0) + || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){ + pprint("suicide: bad address in notify\n"); + qunlock(&up->debug); + pexit("Suicide", 0); + } + + up->ureg = (void*)sp; + memmove((Ureg*)sp, ureg, sizeof(Ureg)); + *(Ureg**)(sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */ + up->ureg = (void*)sp; + sp -= BY2WD+ERRMAX; + memmove((char*)sp, up->note[0].msg, ERRMAX); + sp -= 3*BY2WD; + *(ulong*)(sp+2*BY2WD) = sp+3*BY2WD; /* arg 2 is string */ + *(ulong*)(sp+1*BY2WD) = (ulong)up->ureg; /* arg 1 is ureg* */ + *(ulong*)(sp+0*BY2WD) = 0; /* arg 0 is pc */ + ureg->usp = sp; + ureg->pc = (ulong)up->notify; + up->notified = 1; + up->nnote--; + memmove(&up->lastnote, &up->note[0], sizeof(Note)); + memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); + + qunlock(&up->debug); + splx(s); + return 1; +} + +/* + * Return user to state before notify() + */ +void +noted(Ureg* ureg, ulong arg0) +{ + Ureg *nureg; + ulong oureg, sp; + + qlock(&up->debug); + if(arg0!=NRSTR && !up->notified) { + qunlock(&up->debug); + pprint("call to noted() when not notified\n"); + pexit("Suicide", 0); + } + up->notified = 0; + + nureg = up->ureg; /* pointer to user returned Ureg struct */ + + up->fpstate &= ~FPillegal; + + /* sanity clause */ + oureg = (ulong)nureg; + if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){ + pprint("bad ureg in noted or call to noted when not notified\n"); + qunlock(&up->debug); + pexit("Suicide", 0); + } + + /* + * Check the segment selectors are all valid, otherwise + * a fault will be taken on attempting to return to the + * user process. + * Take care with the comparisons as different processor + * generations push segment descriptors in different ways. + */ + if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL + || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL + || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){ + pprint("bad segment selector in noted\n"); + pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL); + pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL); + pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n", + ureg->es, ureg->fs, ureg->gs, UDSEL); + pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL); + qunlock(&up->debug); + pexit("Suicide", 0); + } + + /* don't let user change system flags */ + nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5); + + memmove(ureg, nureg, sizeof(Ureg)); + + switch(arg0){ + case NCONT: + case NRSTR: + if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted\n"); + pexit("Suicide", 0); + } + up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD)); + qunlock(&up->debug); + break; + + case NSAVE: + if(!okaddr(nureg->pc, BY2WD, 0) + || !okaddr(nureg->usp, BY2WD, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted\n"); + pexit("Suicide", 0); + } + qunlock(&up->debug); + sp = oureg-4*BY2WD-ERRMAX; + splhi(); + ureg->sp = sp; + ((ulong*)sp)[1] = oureg; /* arg 1 0(FP) is ureg* */ + ((ulong*)sp)[0] = 0; /* arg 0 is pc */ + break; + + default: + pprint("unknown noted arg 0x%lux\n", arg0); + up->lastnote.flag = NDebug; + /* fall through */ + + case NDFLT: + if(up->lastnote.flag == NDebug){ + qunlock(&up->debug); + pprint("suicide: %s\n", up->lastnote.msg); + } else + qunlock(&up->debug); + pexit(up->lastnote.msg, up->lastnote.flag!=NDebug); + } +} + +uintptr +execregs(uintptr entry, ulong ssize, ulong nargs) +{ + ulong *sp; + Ureg *ureg; + + up->fpstate = FPinit; + fpoff(); + + sp = (ulong*)(USTKTOP - ssize); + *--sp = nargs; + + ureg = up->dbgreg; + ureg->usp = (ulong)sp; + ureg->pc = entry; +// print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos)); + return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */ +} + +/* + * return the userpc the last exception happened at + */ +ulong +userpc(void) +{ + Ureg *ureg; + + ureg = (Ureg*)up->dbgreg; + return ureg->pc; +} + +/* This routine must save the values of registers the user is not permitted + * to write from devproc and then restore the saved values before returning. + */ +void +setregisters(Ureg* ureg, char* pureg, char* uva, int n) +{ + ulong flags; + ulong cs; + ulong ss; + + flags = ureg->flags; + cs = ureg->cs; + ss = ureg->ss; + memmove(pureg, uva, n); + ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00); + ureg->cs = cs; + ureg->ss = ss; +} + +static void +linkproc(void) +{ + spllo(); + up->kpfun(up->kparg); + pexit("kproc dying", 0); +} + +void +kprocchild(Proc* p, void (*func)(void*), void* arg) +{ + /* + * gotolabel() needs a word on the stack in + * which to place the return PC used to jump + * to linkproc(). + */ + p->sched.pc = (ulong)linkproc; + p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD; + + p->kpfun = func; + p->kparg = arg; +} + +void +forkchild(Proc *p, Ureg *ureg) +{ + Ureg *cureg; + + /* + * Add 2*BY2WD to the stack to account for + * - the return PC + * - trap's argument (ur) + */ + p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD); + p->sched.pc = (ulong)forkret; + + cureg = (Ureg*)(p->sched.sp+2*BY2WD); + memmove(cureg, ureg, sizeof(Ureg)); + /* return value of syscall in child */ + cureg->ax = 0; + + /* Things from bottom of syscall which were never executed */ + p->psstate = 0; + p->insyscall = 0; +} + +/* Give enough context in the ureg to produce a kernel stack for + * a sleeping process + */ +void +setkernur(Ureg* ureg, Proc* p) +{ + ureg->pc = p->sched.pc; + ureg->sp = p->sched.sp+4; +} + +ulong +dbgpc(Proc *p) +{ + Ureg *ureg; + + ureg = p->dbgreg; + if(ureg == 0) + return 0; + + return ureg->pc; +} + +/* + * install_safe_pf_handler / install_normal_pf_handler: + * + * These are used within the failsafe_callback handler in entry.S to avoid + * taking a full page fault when reloading FS and GS. This is because FS and + * GS could be invalid at pretty much any point while Xenolinux executes (we + * don't set them to safe values on entry to the kernel). At *any* point Xen + * may be entered due to a hardware interrupt --- on exit from Xen an invalid + * FS/GS will cause our failsafe_callback to be executed. This could occur, + * for example, while the mmu_update_queue is in an inconsistent state. This + * is disastrous because the normal page-fault handler touches the update + * queue! + * + * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS + * to zero if they cannot be reloaded -- at this point executing a normal + * page fault would not change this effect. The safe page-fault handler + * ensures this end result (blow away the selector value) without the dangers + * of the normal page-fault handler. + * + * NB. Perhaps this can all go away after we have implemented writeable + * page tables. :-) + */ +static void +safe_fault386(Ureg* , void* ) { + panic("DO SAFE PAGE FAULT!\n"); + + + +} + +unsigned long install_safe_pf_handler(void) +{ + dprint("called from failsafe callback\n"); + trapenable(VectorPF, safe_fault386, 0, "safe_fault386"); + return 0; +} + +void install_normal_pf_handler(unsigned long) +{ + trapenable(VectorPF, fault386, 0, "fault386"); +} diff --git a/sys/src/9/xen/uartxen.c b/sys/src/9/xen/uartxen.c new file mode 100644 index 000000000..31e9b8ef3 --- /dev/null +++ b/sys/src/9/xen/uartxen.c @@ -0,0 +1,334 @@ +/* + * xencons.c + * Access to xen consoles. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../pc/io.h" + +extern PhysUart xenphysuart; + +static Uart xenuart = { + .name = "xencons", + .freq = 1843200, + .phys = &xenphysuart, +}; + +struct { + struct xencons_interface *intf; + int evtchn; + Lock txlock; +} xencons; + +/* + * Debug print to xen "emergency console". + * Output only appears if xen is built with verbose=y + */ +void +dprint(char *fmt, ...) +{ + int n; + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + HYPERVISOR_console_io(CONSOLEIO_write, n, buf); +} + +static void kick(Uart*); +/* + * Emit a string to the guest OS console, bypassing the queue + * - before serialoq is initialised + * - when rdb is activated + * - from iprint() for messages from interrupt routines + * If ring is full, just throw extra output away. + */ +void +xenuartputs(char *s, int n) +{ + struct xencons_interface *con = xencons.intf; + unsigned long prod; + int c; + + ilock(&xencons.txlock); + prod = con->out_prod; + while (n-- > 0 && (prod - con->out_cons) < sizeof(con->out)) { + c = *s++; + /* + if (c == '\n') + con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r'; + */ + con->out[MASK_XENCONS_IDX(prod++, con->out)] = c; + } + coherence(); + con->out_prod = prod; + xenchannotify(xencons.evtchn); + iunlock(&xencons.txlock); +} + +/* + * Handle channel event from console + */ +static void +interrupt(Ureg*, void *arg) +{ + char c; + unsigned long cons; + Uart *uart; + struct xencons_interface *con = xencons.intf; + + uart = &xenuart; + + cons = con->in_cons; + coherence(); + while (cons != con->in_prod) { + c = con->in[MASK_XENCONS_IDX(cons++, con->in)]; + uartrecv(uart, c & 0xFF); + } + coherence(); + con->in_cons = cons; + kick(nil); +} + +static Uart* +pnp(void) +{ + return &xenuart; +} + +static void +enable(Uart*, int ie) +{ + if(ie) + intrenable(xencons.evtchn, interrupt, 0, BUSUNKNOWN, "Xen console"); +} + +static void +disable(Uart*) +{ +} + +/* + * Send queued output to guest OS console + */ +static void +kick(Uart*) +{ + struct xencons_interface *con = xencons.intf; + unsigned long prod; + long avail, idx, n, m; + + ilock(&xencons.txlock); + prod = con->out_prod; + avail = sizeof(con->out) - (prod - con->out_cons); + while (avail > 0) { + idx = MASK_XENCONS_IDX(prod, con->out); + m = sizeof(con->out) - idx; + if (m > avail) + m = avail; + n = qconsume(serialoq, con->out+idx, m); + if (n < 0) + break; + prod += n; + avail -= n; + } + coherence(); + con->out_prod = prod; + xenchannotify(xencons.evtchn); + iunlock(&xencons.txlock); +} + +static void +donothing(Uart*, int) +{ +} + +static int +donothingint(Uart*, int) +{ + return 0; +} + +static int +baud(Uart *uart, int n) +{ + if(n <= 0) + return -1; + + uart->baud = n; + return 0; +} + +static int +bits(Uart *uart, int n) +{ + switch(n){ + case 7: + case 8: + break; + default: + return -1; + } + + uart->bits = n; + return 0; +} + +static int +stop(Uart *uart, int n) +{ + if(n != 1) + return -1; + uart->stop = n; + return 0; +} + +static int +parity(Uart *uart, int n) +{ + if(n != 'n') + return -1; + uart->parity = n; + return 0; +} + +static long +status(Uart *uart, void *buf, long n, long offset) +{ + char *p; + + p = malloc(READSTR); + if(p == nil) + error(Enomem); + snprint(p, READSTR, + "b%d\n" + "dev(%d) type(%d) framing(%d) overruns(%d) " + "berr(%d) serr(%d)\n", + + uart->baud, + uart->dev, + uart->type, + uart->ferr, + uart->oerr, + uart->berr, + uart->serr + ); + n = readstr(offset, buf, n, p); + free(p); + + return n; +} + +void +xenputc(Uart*, int c) +{ + struct xencons_interface *con = xencons.intf; + unsigned long prod; + + c &= 0xFF; + + ilock(&xencons.txlock); + /* + while(con->out_cons == con->out_prod) + HYPERVISOR_yield(); + */ + if(con->out_cons == con->out_prod){ + iunlock(&xencons.txlock); + return; + } + + prod = con->out_prod; + + if((con->out[MASK_XENCONS_IDX(prod++, con->out)] = c) == '\n') + con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r'; + + coherence(); + con->out_prod = prod; + xenchannotify(xencons.evtchn); + iunlock(&xencons.txlock); +} + +int +xengetc(Uart*) +{ + struct xencons_interface *con = xencons.intf; + char c; + + c = 0; + + if(con->in_cons != con->in_prod){ + coherence(); + c = con->in[MASK_XENCONS_IDX(con->in_cons++, con->in)]; + if (con->in_cons == con->in_prod) + xenchannotify(xencons.evtchn); + } + + return c; +} + +PhysUart xenphysuart = { + .name = "xenuart", + + .pnp = pnp, + .enable = enable, + .disable = disable, + .kick = kick, + .dobreak = donothing, + .baud = baud, + .bits = bits, + .stop = stop, + .parity = parity, + .modemctl = donothing, + .rts = donothing, + .dtr = donothing, + .status = status, + .fifo = donothing, + + .getc = xengetc, + .putc = xenputc, +}; + +/* console=0 to enable */ +void +xenconsinit(void) +{ + xencons.intf = (struct xencons_interface*)mmumapframe(XENCONSOLE, xenstart->console_mfn); + xencons.evtchn = xenstart->console_evtchn; + + consuart = &xenuart; +} + +void +kbdenable(void) +{ + Uart *uart; + int n; + char *p, *cmd; + + if((p = getconf("console")) == nil) + return; + n = strtoul(p, &cmd, 0); + if(p == cmd || n != 0) + return; + uart = &xenuart; + + (*uart->phys->enable)(uart, 0); + uartctl(uart, "b9600 l8 pn s1"); + if(*cmd != '\0') + uartctl(uart, cmd); + + consuart = uart; + uart->console = 1; + + uartputs("CONSOLE1\n", 9); + + //*(char*)0 = 0; +} + diff --git a/sys/src/9/xen/xen-public/COPYING b/sys/src/9/xen/xen-public/COPYING new file mode 100644 index 000000000..ffc6d6166 --- /dev/null +++ b/sys/src/9/xen/xen-public/COPYING @@ -0,0 +1,38 @@ +XEN NOTICE +========== + +This copyright applies to all files within this subdirectory and its +subdirectories: + include/public/*.h + include/public/hvm/*.h + include/public/io/*.h + +The intention is that these files can be freely copied into the source +tree of an operating system when porting that OS to run on Xen. Doing +so does *not* cause the OS to become subject to the terms of the GPL. + +All other files in the Xen source distribution are covered by version +2 of the GNU General Public License except where explicitly stated +otherwise within individual source files. + + -- Keir Fraser (on behalf of the Xen team) + +===================================================================== + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/sys/src/9/xen/xen-public/arch-arm.h b/sys/src/9/xen/xen-public/arch-arm.h new file mode 100644 index 000000000..8aa62d373 --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-arm.h @@ -0,0 +1,252 @@ +/****************************************************************************** + * arch-arm.h + * + * Guest OS interface to ARM Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright 2011 (C) Citrix Systems + */ + +#ifndef __XEN_PUBLIC_ARCH_ARM_H__ +#define __XEN_PUBLIC_ARCH_ARM_H__ + +/* hypercall calling convention + * ---------------------------- + * + * A hypercall is issued using the ARM HVC instruction. + * + * A hypercall can take up to 5 arguments. These are passed in + * registers, the first argument in x0/r0 (for arm64/arm32 guests + * respectively irrespective of whether the underlying hypervisor is + * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2, + * the forth in x3/r3 and the fifth in x4/r4. + * + * The hypercall number is passed in r12 (arm) or x16 (arm64). In both + * cases the relevant ARM procedure calling convention specifies this + * is an inter-procedure-call scratch register (e.g. for use in linker + * stubs). This use does not conflict with use during a hypercall. + * + * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG. + * + * The return value is in x0/r0. + * + * The hypercall will clobber x16/r12 and the argument registers used + * by that hypercall (except r0 which is the return value) i.e. in + * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a + * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3. + * + * Parameter structs passed to hypercalls are laid out according to + * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA + * EABI) and Procedure Call Standard for the ARM 64-bit Architecture + * (AAPCS64). Where there is a conflict the 64-bit standard should be + * used regardless of guest type. Structures which are passed as + * hypercall arguments are always little endian. + */ + +#define XEN_HYPERCALL_TAG 0XEA1 + +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) + +#ifndef __ASSEMBLY__ +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef union { type *p; unsigned long q; } \ + __guest_handle_ ## name; \ + typedef union { type *p; uint64_aligned_t q; } \ + __guest_handle_64_ ## name; + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes + * aligned. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. It is 4 bytes on aarch and 8 bytes on aarch64. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +/* this is going to be changed on 64 bit */ +#define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name +#define set_xen_guest_handle_raw(hnd, val) \ + do { \ + typeof(&(hnd)) _sxghr_tmp = &(hnd); \ + _sxghr_tmp->q = 0; \ + _sxghr_tmp->p = val; \ + } while ( 0 ) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */ +# define __DECL_REG(n64, n32) union { \ + uint64_t n64; \ + uint32_t n32; \ + } +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */ +#define __DECL_REG(n64, n32) uint64_t n64 +#endif + +struct vcpu_guest_core_regs +{ + /* Aarch64 Aarch32 */ + __DECL_REG(x0, r0_usr); + __DECL_REG(x1, r1_usr); + __DECL_REG(x2, r2_usr); + __DECL_REG(x3, r3_usr); + __DECL_REG(x4, r4_usr); + __DECL_REG(x5, r5_usr); + __DECL_REG(x6, r6_usr); + __DECL_REG(x7, r7_usr); + __DECL_REG(x8, r8_usr); + __DECL_REG(x9, r9_usr); + __DECL_REG(x10, r10_usr); + __DECL_REG(x11, r11_usr); + __DECL_REG(x12, r12_usr); + + __DECL_REG(x13, sp_usr); + __DECL_REG(x14, lr_usr); + + __DECL_REG(x15, __unused_sp_hyp); + + __DECL_REG(x16, lr_irq); + __DECL_REG(x17, sp_irq); + + __DECL_REG(x18, lr_svc); + __DECL_REG(x19, sp_svc); + + __DECL_REG(x20, lr_abt); + __DECL_REG(x21, sp_abt); + + __DECL_REG(x22, lr_und); + __DECL_REG(x23, sp_und); + + __DECL_REG(x24, r8_fiq); + __DECL_REG(x25, r9_fiq); + __DECL_REG(x26, r10_fiq); + __DECL_REG(x27, r11_fiq); + __DECL_REG(x28, r12_fiq); + + __DECL_REG(x29, sp_fiq); + __DECL_REG(x30, lr_fiq); + + /* Return address and mode */ + __DECL_REG(pc64, pc32); /* ELR_EL2 */ + uint32_t cpsr; /* SPSR_EL2 */ + + union { + uint32_t spsr_el1; /* AArch64 */ + uint32_t spsr_svc; /* AArch32 */ + }; + + /* AArch32 guests only */ + uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt; + + /* AArch64 guests only */ + uint64_t sp_el0; + uint64_t sp_el1, elr_el1; +}; +typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t); + +#undef __DECL_REG + +typedef uint64_t xen_pfn_t; +#define PRI_xen_pfn PRIx64 + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */ +#define XEN_LEGACY_MAX_VCPUS 1 + +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong PRIx64 + +struct vcpu_guest_context { +#define _VGCF_online 0 +#define VGCF_online (1<<_VGCF_online) + uint32_t flags; /* VGCF_* */ + + struct vcpu_guest_core_regs user_regs; /* Core CPU registers */ + + uint32_t sctlr, ttbcr; + uint64_t ttbr0, ttbr1; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_vcpu_info { }; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct arch_shared_info { }; +typedef struct arch_shared_info arch_shared_info_t; +typedef uint64_t xen_callback_t; + +#endif /* ifndef __ASSEMBLY __ */ + +/* PSR bits (CPSR, SPSR)*/ + +/* 32 bit modes */ +#define PSR_MODE_USR 0x10 +#define PSR_MODE_FIQ 0x11 +#define PSR_MODE_IRQ 0x12 +#define PSR_MODE_SVC 0x13 +#define PSR_MODE_MON 0x16 +#define PSR_MODE_ABT 0x17 +#define PSR_MODE_HYP 0x1a +#define PSR_MODE_UND 0x1b +#define PSR_MODE_SYS 0x1f + +/* 64 bit modes */ +#ifdef __aarch64__ +#define PSR_MODE_BIT 0x10 /* Set iff AArch32 */ +#define PSR_MODE_EL3h 0x0d +#define PSR_MODE_EL3t 0x0c +#define PSR_MODE_EL2h 0x09 +#define PSR_MODE_EL2t 0x08 +#define PSR_MODE_EL1h 0x05 +#define PSR_MODE_EL1t 0x04 +#define PSR_MODE_EL0t 0x00 +#endif + +#define PSR_THUMB (1<<5) /* Thumb Mode enable */ +#define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ +#define PSR_IRQ_MASK (1<<7) /* Interrupt mask */ +#define PSR_ABT_MASK (1<<8) /* Asynchronous Abort mask */ +#define PSR_BIG_ENDIAN (1<<9) /* Big Endian Mode */ +#define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */ +#define PSR_JAZELLE (1<<24) /* Jazelle Mode */ + +#define PSR_GUEST_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) + +#endif /* __XEN_PUBLIC_ARCH_ARM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/arch-arm/hvm/save.h b/sys/src/9/xen/xen-public/arch-arm/hvm/save.h new file mode 100644 index 000000000..75b8e65bc --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-arm/hvm/save.h @@ -0,0 +1,39 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2012 Citrix Systems Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__ +#define __XEN_PUBLIC_HVM_SAVE_ARM_H__ + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/arch-x86/cpuid.h b/sys/src/9/xen/xen-public/arch-x86/cpuid.h new file mode 100644 index 000000000..d9bd62714 --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/cpuid.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir@xen.org> + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* Xen identification leaves start at 0x40000000. */ +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000000) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000001) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000002) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff --git a/sys/src/9/xen/xen-public/arch-x86/hvm/save.h b/sys/src/9/xen/xen-public/arch-x86/hvm/save.h new file mode 100644 index 000000000..3664aaf83 --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/hvm/save.h @@ -0,0 +1,600 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ +#define __XEN_PUBLIC_HVM_SAVE_X86_H__ + +/* + * Save/restore header: general info about the save file. + */ + +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header { + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ + uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ +}; + +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + + +/* + * Processor + * + * Compat: Pre-3.4 didn't have msr_tsc_aux + */ + +struct hvm_hw_cpu { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + uint64_t msr_tsc_aux; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +struct hvm_hw_cpu_compat { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + /*uint64_t msr_tsc_aux; COMPAT */ + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +static inline int _hvm_hw_fix_cpu(void *h) { + + union hvm_hw_cpu_union { + struct hvm_hw_cpu nat; + struct hvm_hw_cpu_compat cmp; + } *ucpu = (union hvm_hw_cpu_union *)h; + + /* If we copy from the end backwards, we should + * be able to do the modification in-place */ + ucpu->nat.error_code = ucpu->cmp.error_code; + ucpu->nat.pending_event = ucpu->cmp.pending_event; + ucpu->nat.tsc = ucpu->cmp.tsc; + ucpu->nat.msr_tsc_aux = 0; + + return 0; +} + +DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ + struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); + +/* + * PIC + */ + +struct hvm_hw_vpic { + /* IR line bitmasks. */ + uint8_t irr; + uint8_t imr; + uint8_t isr; + + /* Line IRx maps to IRQ irq_base+x */ + uint8_t irq_base; + + /* + * Where are we in ICW2-4 initialisation (0 means no init in progress)? + * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). + * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) + * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) + */ + uint8_t init_state:4; + + /* IR line with highest priority. */ + uint8_t priority_add:4; + + /* Reads from A=0 obtain ISR or IRR? */ + uint8_t readsel_isr:1; + + /* Reads perform a polling read? */ + uint8_t poll:1; + + /* Automatically clear IRQs from the ISR during INTA? */ + uint8_t auto_eoi:1; + + /* Automatically rotate IRQ priorities during AEOI? */ + uint8_t rotate_on_auto_eoi:1; + + /* Exclude slave inputs when considering in-service IRQs? */ + uint8_t special_fully_nested_mode:1; + + /* Special mask mode excludes masked IRs from AEOI and priority checks. */ + uint8_t special_mask_mode:1; + + /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ + uint8_t is_master:1; + + /* Edge/trigger selection. */ + uint8_t elcr; + + /* Virtual INT output. */ + uint8_t int_output; +}; + +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); + + +/* + * IO-APIC + */ + +#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ + +struct hvm_hw_vioapic { + uint64_t base_address; + uint32_t ioregsel; + uint32_t id; + union vioapic_redir_entry + { + uint64_t bits; + struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; + uint8_t reserved[4]; + uint8_t dest_id; + } fields; + } redirtbl[VIOAPIC_NUM_PINS]; +}; + +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); + + +/* + * LAPIC + */ + +struct hvm_hw_lapic { + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; + uint64_t tdt_msr; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); + +struct hvm_hw_lapic_regs { + uint8_t data[1024]; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); + + +/* + * IRQs + */ + +struct hvm_hw_pci_irqs { + /* + * Virtual interrupt wires for a single PCI bus. + * Indexed by: device*4 + INTx#. + */ + union { + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ + uint64_t pad[2]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); + +struct hvm_hw_isa_irqs { + /* + * Virtual interrupt wires for ISA devices. + * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). + */ + union { + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ + uint64_t pad[1]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); + +struct hvm_hw_pci_link { + /* + * PCI-ISA interrupt router. + * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using + * the traditional 'barber's pole' mapping ((device + INTx#) & 3). + * The router provides a programmable mapping from each link to a GSI. + */ + uint8_t route[4]; + uint8_t pad0[4]; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); + +/* + * PIT + */ + +struct hvm_hw_pit { + struct hvm_hw_pit_channel { + uint32_t count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + } channels[3]; /* 3 x 16 bytes */ + uint32_t speaker_data_on; + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); + + +/* + * RTC + */ + +#define RTC_CMOS_SIZE 14 +struct hvm_hw_rtc { + /* CMOS bytes */ + uint8_t cmos_data[RTC_CMOS_SIZE]; + /* Index register for 2-part operations */ + uint8_t cmos_index; + uint8_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); + + +/* + * HPET + */ + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct hvm_hw_hpet { + /* Memory-mapped, software visible registers */ + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t fsb; /* FSB route, not supported now */ + uint64_t res4; /* reserved */ + } timers[HPET_TIMER_NUM]; + uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ + + /* Hidden register state */ + uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ +}; + +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); + + +/* + * PM timer + */ + +struct hvm_hw_pmtimer { + uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ + uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ + uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ +}; + +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); + +/* + * MTRR MSRs + */ + +struct hvm_hw_mtrr { +#define MTRR_VCNT 8 +#define NUM_FIXED_MSR 11 + uint64_t msr_pat_cr; + /* mtrr physbase & physmask msr pair*/ + uint64_t msr_mtrr_var[MTRR_VCNT*2]; + uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; + uint64_t msr_mtrr_cap; + uint64_t msr_mtrr_def_type; +}; + +DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); + +/* + * The save area of XSAVE/XRSTOR. + */ + +struct hvm_hw_cpu_xsave { + uint64_t xfeature_mask; + uint64_t xcr0; /* Updated by XSETBV */ + uint64_t xcr0_accum; /* Updated by XSETBV */ + struct { + struct { char x[512]; } fpu_sse; + + struct { + uint64_t xstate_bv; /* Updated by XRSTOR */ + uint64_t reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[0]; } ymm; /* YMM */ + } save_area; +}; + +#define CPU_XSAVE_CODE 16 + +/* + * Viridian hypervisor context. + */ + +struct hvm_viridian_domain_context { + uint64_t hypercall_gpa; + uint64_t guest_os_id; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); + +struct hvm_viridian_vcpu_context { + uint64_t apic_assist; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); + +struct hvm_vmce_vcpu { + uint64_t caps; + uint64_t mci_ctl2_bank0; + uint64_t mci_ctl2_bank1; +}; + +DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + +struct hvm_tsc_adjust { + uint64_t tsc_adjust; +}; + +DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 19 + +#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ diff --git a/sys/src/9/xen/xen-public/arch-x86/xen-mca.h b/sys/src/9/xen/xen-public/arch-x86/xen-mca.h new file mode 100644 index 000000000..04382edee --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/xen-mca.h @@ -0,0 +1,440 @@ +/****************************************************************************** + * arch-x86/mca.h + * + * Contributed by Advanced Micro Devices, Inc. + * Author: Christoph Egger <Christoph.Egger@amd.com> + * + * Guest OS machine check interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Full MCA functionality has the following Usecases from the guest side: + * + * Must have's: + * 1. Dom0 and DomU register machine check trap callback handlers + * (already done via "set_trap_table" hypercall) + * 2. Dom0 registers machine check event callback handler + * (doable via EVTCHNOP_bind_virq) + * 3. Dom0 and DomU fetches machine check data + * 4. Dom0 wants Xen to notify a DomU + * 5. Dom0 gets DomU ID from physical address + * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") + * + * Nice to have's: + * 7. Dom0 wants Xen to deactivate a physical CPU + * This is better done as separate task, physical CPU hotplugging, + * and hypercall(s) should be sysctl's + * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to + * move a DomU (or Dom0 itself) away from a malicious page + * producing correctable errors. + * 9. offlining physical page: + * Xen free's and never re-uses a certain physical page. + * 10. Testfacility: Allow Dom0 to write values into machine check MSR's + * and tell Xen to trigger a machine check + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ +#define __XEN_PUBLIC_ARCH_X86_MCA_H__ + +/* Hypercall */ +#define __HYPERVISOR_mca __HYPERVISOR_arch_0 + +/* + * The xen-unstable repo has interface version 0x03000001; out interface + * is incompatible with that and any future minor revisions, so we + * choose a different version number range that is numerically less + * than that used in xen-unstable. + */ +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 + +/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ +#define XEN_MC_NONURGENT 0x0001 +/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ +#define XEN_MC_URGENT 0x0002 +/* IN: Dom0 acknowledges previosly-fetched telemetry */ +#define XEN_MC_ACK 0x0004 + +/* OUT: All is ok */ +#define XEN_MC_OK 0x0 +/* OUT: Domain could not fetch data. */ +#define XEN_MC_FETCHFAILED 0x1 +/* OUT: There was no machine check data to fetch. */ +#define XEN_MC_NODATA 0x2 +/* OUT: Between notification time and this hypercall an other + * (most likely) correctable error happened. The fetched data, + * does not match the original machine check data. */ +#define XEN_MC_NOMATCH 0x4 + +/* OUT: DomU did not register MC NMI handler. Try something else. */ +#define XEN_MC_CANNOTHANDLE 0x8 +/* OUT: Notifying DomU failed. Retry later or try something else. */ +#define XEN_MC_NOTDELIVERED 0x10 +/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ + + +#ifndef __ASSEMBLY__ + +#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ + +/* + * Machine Check Architecure: + * structs are read-only and used to report all kinds of + * correctable and uncorrectable errors detected by the HW. + * Dom0 and DomU: register a handler to get notified. + * Dom0 only: Correctable errors are reported via VIRQ_MCA + * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers + */ +#define MC_TYPE_GLOBAL 0 +#define MC_TYPE_BANK 1 +#define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 + +struct mcinfo_common { + uint16_t type; /* structure type */ + uint16_t size; /* size of this struct in bytes */ +}; + + +#define MC_FLAG_CORRECTABLE (1 << 0) +#define MC_FLAG_UNCORRECTABLE (1 << 1) +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) +#define MC_FLAG_MCE (1 << 6) +/* contains global x86 mc information */ +struct mcinfo_global { + struct mcinfo_common common; + + /* running domain at the time in error (most likely the impacted one) */ + uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint32_t mc_socketid; /* physical socket of the physical core */ + uint16_t mc_coreid; /* physical impacted core */ + uint16_t mc_core_threadid; /* core thread of physical core */ + uint32_t mc_apicid; + uint32_t mc_flags; + uint64_t mc_gstatus; /* global status */ +}; + +/* contains bank local x86 mc information */ +struct mcinfo_bank { + struct mcinfo_common common; + + uint16_t mc_bank; /* bank nr */ + uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 + * and if mc_addr is valid. Never valid on DomU. */ + uint64_t mc_status; /* bank status */ + uint64_t mc_addr; /* bank address, only valid + * if addr bit is set in mc_status */ + uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; +}; + + +struct mcinfo_msr { + uint64_t reg; /* MSR */ + uint64_t value; /* MSR value */ +}; + +/* contains mc information from other + * or additional mc MSRs */ +struct mcinfo_extended { + struct mcinfo_common common; + + /* You can fill up to five registers. + * If you need more, then use this structure + * multiple times. */ + + uint32_t mc_msrs; /* Number of msr with valid values. */ + /* + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery +{ + struct mcinfo_common common; + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; +}; + + +#define MCINFO_HYPERCALLSIZE 1024 +#define MCINFO_MAXSIZE 768 + +#define MCINFO_FLAGS_UNCOMPLETE 0x1 +struct mc_info { + /* Number of mcinfo_* entries in mi_data */ + uint32_t mi_nentries; + uint32_t flags; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; +}; +typedef struct mc_info mc_info_t; +DEFINE_XEN_GUEST_HANDLE(mc_info_t); + +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ + +struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); + + +/* + * OS's should use these instead of writing their own lookup function + * each with its own bugs and drawbacks. + * We use macros instead of static inline functions to allow guests + * to include this header in assembly files (*.S). + */ +/* Prototype: + * uint32_t x86_mcinfo_nentries(struct mc_info *mi); + */ +#define x86_mcinfo_nentries(_mi) \ + (_mi)->mi_nentries +/* Prototype: + * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); + */ +#define x86_mcinfo_first(_mi) \ + ((struct mcinfo_common *)(_mi)->mi_data) +/* Prototype: + * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); + */ +#define x86_mcinfo_next(_mic) \ + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) + +/* Prototype: + * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); + */ +#define x86_mcinfo_lookup(_ret, _mi, _type) \ + do { \ + uint32_t found, i; \ + struct mcinfo_common *_mic; \ + \ + found = 0; \ + (_ret) = NULL; \ + if (_mi == NULL) break; \ + _mic = x86_mcinfo_first(_mi); \ + for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ + if (_mic->type == (_type)) { \ + found = 1; \ + break; \ + } \ + _mic = x86_mcinfo_next(_mic); \ + } \ + (_ret) = found ? _mic : NULL; \ + } while (0) + + +/* Usecase 1 + * Register machine check trap callback handler + * (already done via "set_trap_table" hypercall) + */ + +/* Usecase 2 + * Dom0 registers machine check event callback handler + * done by EVTCHNOP_bind_virq + */ + +/* Usecase 3 + * Fetch machine check data from hypervisor. + * Note, this hypercall is special, because both Dom0 and DomU must use this. + */ +#define XEN_MC_fetch 1 +struct xen_mc_fetch { + /* IN/OUT variables. */ + uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, + XEN_MC_ACK if ack'ing an earlier fetch */ + /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, + XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; + uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ + + /* OUT variables. */ + XEN_GUEST_HANDLE(mc_info_t) data; +}; +typedef struct xen_mc_fetch xen_mc_fetch_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); + + +/* Usecase 4 + * This tells the hypervisor to notify a DomU about the machine check error + */ +#define XEN_MC_notifydomain 2 +struct xen_mc_notifydomain { + /* IN variables. */ + uint16_t mc_domid; /* The unprivileged domain to notify. */ + uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. + * Usually echo'd value from the fetch hypercall. */ + + /* IN/OUT variables. */ + uint32_t flags; + +/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ +/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ +}; +typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); + +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t _pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; + +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 + +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define XEN_MC_inject_v2 6 +#define XEN_MC_INJECT_TYPE_MASK 0x7 +#define XEN_MC_INJECT_TYPE_MCE 0x0 +#define XEN_MC_INJECT_TYPE_CMCI 0x1 + +#define XEN_MC_INJECT_CPU_BROADCAST 0x8 + +struct xen_mc_inject_v2 { + uint32_t flags; + struct xenctl_bitmap cpumap; +}; +#endif + +struct xen_mc { + uint32_t cmd; + uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ + union { + struct xen_mc_fetch mc_fetch; + struct xen_mc_notifydomain mc_notifydomain; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; +#if defined(__XEN__) || defined(__XEN_TOOLS__) + struct xen_mc_inject_v2 mc_inject_v2; +#endif + } u; +}; +typedef struct xen_mc xen_mc_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_t); + +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ diff --git a/sys/src/9/xen/xen-public/arch-x86/xen-x86_32.h b/sys/src/9/xen/xen-public/arch-x86/xen-x86_32.h new file mode 100644 index 000000000..15041911e --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_32.h @@ -0,0 +1,171 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +/* Non-PAE bounds are obsolete. */ +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef ___DEFINE_XEN_GUEST_HANDLE +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/arch-x86/xen-x86_64.h b/sys/src/9/xen/xen-public/arch-x86/xen-x86_64.h new file mode 100644 index 000000000..1c4e1590f --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_64.h @@ -0,0 +1,202 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/arch-x86/xen.h b/sys/src/9/xen/xen-public/arch-x86/xen.h new file mode 100644 index 000000000..b7f6a517b --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86/xen.h @@ -0,0 +1,260 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "../xen.h" + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) + +#if defined(__i386__) +#include "xen-x86_32.h" +#elif defined(__x86_64__) +#include "xen-x86_64.h" +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +/* + * `incontents 200 segdesc Segment Descriptor Tables + */ +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc); + * ` + * ` @pa The machine physical address of the descriptor to + * ` update. Must be either a descriptor page or writable. + * ` @desc The descriptor value to update, in the same format as a + * ` native descriptor table entry. + */ + +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + }; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + uint64_t pad[32]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); + * + * ` unsigned long + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((unsigned long)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/arch-x86_32.h b/sys/src/9/xen/xen-public/arch-x86_32.h new file mode 100644 index 000000000..45842b203 --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86_32.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" diff --git a/sys/src/9/xen/xen-public/arch-x86_64.h b/sys/src/9/xen/xen-public/arch-x86_64.h new file mode 100644 index 000000000..409805f35 --- /dev/null +++ b/sys/src/9/xen/xen-public/arch-x86_64.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * arch-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_callbacks(unsigned long event_selector, + * ` unsigned long event_address, + * ` unsigned long failsafe_selector, + * ` unsigned long failsafe_address); + * ` + * Register for callbacks on events. When an event (from an event + * channel) occurs, event_address is used as the value of eip. + * + * A similar callback occurs if the segment selectors are invalid. + * failsafe_address is used as the value of eip. + * + * On x86_64, event_selector and failsafe_selector are ignored (???). + */ diff --git a/sys/src/9/xen/xen-public/callback.h b/sys/src/9/xen/xen-public/callback.h new file mode 100644 index 000000000..8f937880e --- /dev/null +++ b/sys/src/9/xen/xen-public/callback.h @@ -0,0 +1,121 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* x86: Callback for event delivery. */ +#define CALLBACKTYPE_event 0 + +/* x86: Failsafe callback when guest state cannot be restored by Xen. */ +#define CALLBACKTYPE_failsafe 1 + +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ +#define CALLBACKTYPE_syscall 2 + +/* + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel + * feature is enabled. Do not use this callback type in new code. + */ +#define CALLBACKTYPE_sysenter_deprecated 3 + +/* x86: Callback for NMI delivery. */ +#define CALLBACKTYPE_nmi 4 + +/* + * x86: sysenter is only available as follows: + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + * [nb. also 64-bit guest applications on Intel CPUs + * ('64-on-64-on-64'), but syscall is preferred] + */ +#define CALLBACKTYPE_sysenter 5 + +/* + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + */ +#define CALLBACKTYPE_syscall32 7 + +/* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +struct callback_register { + uint16_t type; + uint16_t flags; + xen_callback_t address; +}; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +struct callback_unregister { + uint16_t type; + uint16_t _unused; +}; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/dom0_ops.h b/sys/src/9/xen/xen-public/dom0_ops.h new file mode 100644 index 000000000..c8d764ed6 --- /dev/null +++ b/sys/src/9/xen/xen-public/dom0_ops.h @@ -0,0 +1,120 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOM0_OPS_H__ +#define __XEN_PUBLIC_DOM0_OPS_H__ + +#include "xen.h" +#include "platform.h" + +#if __XEN_INTERFACE_VERSION__ >= 0x00030204 +#error "dom0_ops.h is a compatibility interface only" +#endif + +#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION + +#define DOM0_SETTIME XENPF_settime +#define dom0_settime xenpf_settime +#define dom0_settime_t xenpf_settime_t + +#define DOM0_ADD_MEMTYPE XENPF_add_memtype +#define dom0_add_memtype xenpf_add_memtype +#define dom0_add_memtype_t xenpf_add_memtype_t + +#define DOM0_DEL_MEMTYPE XENPF_del_memtype +#define dom0_del_memtype xenpf_del_memtype +#define dom0_del_memtype_t xenpf_del_memtype_t + +#define DOM0_READ_MEMTYPE XENPF_read_memtype +#define dom0_read_memtype xenpf_read_memtype +#define dom0_read_memtype_t xenpf_read_memtype_t + +#define DOM0_MICROCODE XENPF_microcode_update +#define dom0_microcode xenpf_microcode_update +#define dom0_microcode_t xenpf_microcode_update_t + +#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk +#define dom0_platform_quirk xenpf_platform_quirk +#define dom0_platform_quirk_t xenpf_platform_quirk_t + +typedef uint64_t cpumap_t; + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_MSR 15 +struct dom0_msr { + /* IN variables. */ + uint32_t write; + cpumap_t cpu_mask; + uint32_t msr; + uint32_t in1; + uint32_t in2; + /* OUT variables. */ + uint32_t out1; + uint32_t out2; +}; +typedef struct dom0_msr dom0_msr_t; +DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_PHYSICAL_MEMORY_MAP 40 +struct dom0_memory_map_entry { + uint64_t start, end; + uint32_t flags; /* reserved */ + uint8_t is_ram; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; +DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); + +struct dom0_op { + uint32_t cmd; + uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ + union { + struct dom0_msr msr; + struct dom0_settime settime; + struct dom0_add_memtype add_memtype; + struct dom0_del_memtype del_memtype; + struct dom0_read_memtype read_memtype; + struct dom0_microcode microcode; + struct dom0_platform_quirk platform_quirk; + struct dom0_memory_map_entry physical_memory_map; + uint8_t pad[128]; + } u; +}; +typedef struct dom0_op dom0_op_t; +DEFINE_XEN_GUEST_HANDLE(dom0_op_t); + +#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/domctl.h b/sys/src/9/xen/xen-public/domctl.h new file mode 100644 index 000000000..4c5b2bbbb --- /dev/null +++ b/sys/src/9/xen/xen-public/domctl.h @@ -0,0 +1,998 @@ +/****************************************************************************** + * domctl.h + * + * Domain management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOMCTL_H__ +#define __XEN_PUBLIC_DOMCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "domctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "grant_table.h" +#include "hvm/save.h" + +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 + +/* + * NB. xen_domctl.domain is an IN/OUT parameter for this operation. + * If it is specified as zero, an id is auto-allocated and returned. + */ +/* XEN_DOMCTL_createdomain */ +struct xen_domctl_createdomain { + /* IN parameters */ + uint32_t ssidref; + xen_domain_handle_t handle; + /* Is this an HVM guest (as opposed to a PV guest)? */ +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) + /* Use hardware-assisted paging if available? */ +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + /* Should domain memory integrity be verifed by tboot during Sx? */ +#define _XEN_DOMCTL_CDF_s3_integrity 2 +#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* Disable out-of-sync shadow page tables? */ +#define _XEN_DOMCTL_CDF_oos_off 3 +#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) + uint32_t flags; +}; +typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); + +/* XEN_DOMCTL_getdomaininfo */ +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t outstanding_pages; + uint64_aligned_t shr_pages; + uint64_aligned_t paged_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; + uint32_t cpupool; +}; +typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); + + +/* XEN_DOMCTL_getmemlist */ +struct xen_domctl_getmemlist { + /* IN variables. */ + /* Max entries to write to output buffer. */ + uint64_aligned_t max_pfns; + /* Start index in guest's page list. */ + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64) buffer; + /* OUT variables. */ + uint64_aligned_t num_pfns; +}; +typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); + + +/* XEN_DOMCTL_getpageframeinfo */ + +#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ +#define XEN_DOMCTL_PFINFO_BROKEN (0xdU<<28) /* broken page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) + +struct xen_domctl_getpageframeinfo { + /* IN variables. */ + uint64_aligned_t gmfn; /* GMFN to query */ + /* OUT variables. */ + /* Is the page PINNED to a type? */ + uint32_t type; /* see above type defs */ +}; +typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); + + +/* XEN_DOMCTL_getpageframeinfo2 */ +struct xen_domctl_getpageframeinfo2 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(uint32) array; +}; +typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); + +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + + +/* + * Control shadow pagetables operation + */ +/* XEN_DOMCTL_shadow_op */ + +/* Disable shadow mode. */ +#define XEN_DOMCTL_SHADOW_OP_OFF 0 + +/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 + +/* Log-dirty bitmap operations. */ + /* Return the bitmap and clean internal copy for next round. */ +#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 + /* Return the bitmap but do not modify internal copy. */ +#define XEN_DOMCTL_SHADOW_OP_PEEK 12 + +/* Memory allocation accessors. */ +#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 +#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 + +/* Legacy enable operations. */ + /* Equiv. to ENABLE with no mode flags. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 + /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 + /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + +/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ + /* + * Shadow pagetables are refcounted: guest does not use explicit mmu + * operations nor write-protect its pagetables. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) + /* + * Log pages in a bitmap as they are dirtied. + * Used for live relocation to determine which pages must be re-sent. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) + /* + * Automatically translate GPFNs into MFNs. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) + /* + * Xen does not steal virtual address space from the guest. + * Requires HVM support. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) + +struct xen_domctl_shadow_op_stats { + uint32_t fault_count; + uint32_t dirty_count; +}; +typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); + +struct xen_domctl_shadow_op { + /* IN variables. */ + uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ + + /* OP_ENABLE */ + uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + + /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ + uint32_t mb; /* Shadow memory allocation in MB */ + + /* OP_PEEK / OP_CLEAN */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ + struct xen_domctl_shadow_op_stats stats; +}; +typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); + + +/* XEN_DOMCTL_max_mem */ +struct xen_domctl_max_mem { + /* IN variables. */ + uint64_aligned_t max_memkb; +}; +typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); + + +/* XEN_DOMCTL_setvcpucontext */ +/* XEN_DOMCTL_getvcpucontext */ +struct xen_domctl_vcpucontext { + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); + + +/* XEN_DOMCTL_getvcpuinfo */ +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; +typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); + + +/* Get/set the NUMA node(s) with which the guest has affinity with. */ +/* XEN_DOMCTL_setnodeaffinity */ +/* XEN_DOMCTL_getnodeaffinity */ +struct xen_domctl_nodeaffinity { + struct xenctl_bitmap nodemap;/* IN */ +}; +typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t); + + +/* Get/set which physical cpus a vcpu can execute on. */ +/* XEN_DOMCTL_setvcpuaffinity */ +/* XEN_DOMCTL_getvcpuaffinity */ +struct xen_domctl_vcpuaffinity { + uint32_t vcpu; /* IN */ + struct xenctl_bitmap cpumap; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); + + +/* XEN_DOMCTL_max_vcpus */ +struct xen_domctl_max_vcpus { + uint32_t max; /* maximum number of vcpus */ +}; +typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); + + +/* XEN_DOMCTL_scheduler_op */ +/* Scheduler types. */ +#define XEN_SCHEDULER_SEDF 4 +#define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_CREDIT2 6 +#define XEN_SCHEDULER_ARINC653 7 +/* Set or get info? */ +#define XEN_DOMCTL_SCHEDOP_putinfo 0 +#define XEN_DOMCTL_SCHEDOP_getinfo 1 +struct xen_domctl_scheduler_op { + uint32_t sched_id; /* XEN_SCHEDULER_* */ + uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + union { + struct xen_domctl_sched_sedf { + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; + uint32_t extratime; + uint32_t weight; + } sedf; + struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; + } credit; + struct xen_domctl_sched_credit2 { + uint16_t weight; + } credit2; + } u; +}; +typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); + + +/* XEN_DOMCTL_setdomainhandle */ +struct xen_domctl_setdomainhandle { + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); + + +/* XEN_DOMCTL_setdebugging */ +struct xen_domctl_setdebugging { + uint8_t enable; +}; +typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); + + +/* XEN_DOMCTL_irq_permission */ +struct xen_domctl_irq_permission { + uint8_t pirq; + uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ +}; +typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); + + +/* XEN_DOMCTL_iomem_permission */ +struct xen_domctl_iomem_permission { + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); + + +/* XEN_DOMCTL_ioport_permission */ +struct xen_domctl_ioport_permission { + uint32_t first_port; /* first port int range */ + uint32_t nr_ports; /* size of port range */ + uint8_t allow_access; /* allow or deny access to range? */ +}; +typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + + +/* XEN_DOMCTL_hypercall_init */ +struct xen_domctl_hypercall_init { + uint64_aligned_t gmfn; /* GMFN to be initialised */ +}; +typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + + +/* XEN_DOMCTL_arch_setup */ +#define _XEN_DOMAINSETUP_hvm_guest 0 +#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) +#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ +#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) +#define _XEN_DOMAINSETUP_sioemu_guest 2 +#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest) +typedef struct xen_domctl_arch_setup { + uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ +} xen_domctl_arch_setup_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); + + +/* XEN_DOMCTL_settimeoffset */ +struct xen_domctl_settimeoffset { + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + +/* XEN_DOMCTL_gethvmcontext */ +/* XEN_DOMCTL_sethvmcontext */ +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +/* XEN_DOMCTL_set_address_size */ +/* XEN_DOMCTL_get_address_size */ +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +/* XEN_DOMCTL_real_mode_area */ +struct xen_domctl_real_mode_area { + uint32_t log; /* log2 of Real Mode Area size */ +}; +typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + + +/* XEN_DOMCTL_sendtrigger */ +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 +#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + +/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ +/* XEN_DOMCTL_assign_device */ +/* XEN_DOMCTL_test_assign_device */ +/* XEN_DOMCTL_deassign_device */ +struct xen_domctl_assign_device { + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ +}; +typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); + +/* Retrieve sibling devices infomation of machine_sbdf */ +/* XEN_DOMCTL_get_device_group */ +struct xen_domctl_get_device_group { + uint32_t machine_sbdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); + +/* Pass-through interrupts: bind real irq -> hvm devfn. */ +/* XEN_DOMCTL_bind_pt_irq */ +/* XEN_DOMCTL_unbind_pt_irq */ +typedef enum pt_irq_type_e { + PT_IRQ_TYPE_PCI, + PT_IRQ_TYPE_ISA, + PT_IRQ_TYPE_MSI, + PT_IRQ_TYPE_MSI_TRANSLATE, +} pt_irq_type_t; +struct xen_domctl_bind_pt_irq { + uint32_t machine_irq; + pt_irq_type_t irq_type; + uint32_t hvm_domid; + + union { + struct { + uint8_t isa_irq; + } isa; + struct { + uint8_t bus; + uint8_t device; + uint8_t intx; + } pci; + struct { + uint8_t gvec; + uint32_t gflags; + uint64_aligned_t gtable; + } msi; + } u; +}; +typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); + + +/* Bind machine I/O address range -> HVM address range. */ +/* XEN_DOMCTL_memory_mapping */ +#define DPCI_ADD_MAPPING 1 +#define DPCI_REMOVE_MAPPING 0 +struct xen_domctl_memory_mapping { + uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ + uint64_aligned_t first_mfn; /* first page (machine page) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned structure */ +}; +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); + + +/* Bind machine I/O port range -> HVM I/O port range. */ +/* XEN_DOMCTL_ioport_mapping */ +struct xen_domctl_ioport_mapping { + uint32_t first_gport; /* first guest IO port*/ + uint32_t first_mport; /* first machine IO port */ + uint32_t nr_ports; /* size of port range */ + uint32_t add_mapping; /* add or remove mapping */ +}; +typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); + + +/* + * Pin caching type of RAM space for x86 HVM domU. + */ +/* XEN_DOMCTL_pin_mem_cacheattr */ +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 +#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 +#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 +#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 +#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 +#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +struct xen_domctl_pin_mem_cacheattr { + uint64_aligned_t start, end; + uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ +}; +typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); + + +/* XEN_DOMCTL_set_ext_vcpucontext */ +/* XEN_DOMCTL_get_ext_vcpucontext */ +struct xen_domctl_ext_vcpucontext { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Size of struct (IN) + * GET: Size of struct (OUT, up to 128 bytes) + */ + uint32_t size; +#if defined(__i386__) || defined(__x86_64__) + /* SYSCALL from 32-bit mode and SYSENTER callback information. */ + /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ + uint64_aligned_t syscall32_callback_eip; + uint64_aligned_t sysenter_callback_eip; + uint16_t syscall32_callback_cs; + uint16_t sysenter_callback_cs; + uint8_t syscall32_disables_events; + uint8_t sysenter_disables_events; +#if defined(__GNUC__) + union { + uint64_aligned_t mcg_cap; + struct hvm_vmce_vcpu vmce; + }; +#else + struct hvm_vmce_vcpu vmce; +#endif +#endif +}; +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); + +/* + * Set the target domain for a domain + */ +/* XEN_DOMCTL_set_target */ +struct xen_domctl_set_target { + domid_t target; +}; +typedef struct xen_domctl_set_target xen_domctl_set_target_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); + +#if defined(__i386__) || defined(__x86_64__) +# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF +/* XEN_DOMCTL_set_cpuid */ +struct xen_domctl_cpuid { + uint32_t input[2]; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); +#endif + +/* XEN_DOMCTL_subscribe */ +struct xen_domctl_subscribe { + uint32_t port; /* IN */ +}; +typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); + +/* + * Define the maximum machine address size which should be allocated + * to a guest. + */ +/* XEN_DOMCTL_set_machine_address_size */ +/* XEN_DOMCTL_get_machine_address_size */ + +/* + * Do not inject spurious page faults into this domain. + */ +/* XEN_DOMCTL_suppress_spurious_page_faults */ + +/* XEN_DOMCTL_debug_op */ +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 +struct xen_domctl_debug_op { + uint32_t op; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); + +/* + * Request a particular record from the HVM context + */ +/* XEN_DOMCTL_gethvmcontext_partial */ +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* XEN_DOMCTL_disable_migrate */ +typedef struct xen_domctl_disable_migrate { + uint32_t disable; /* IN: 1: disable migration and restore */ +} xen_domctl_disable_migrate_t; + + +/* XEN_DOMCTL_gettscinfo */ +/* XEN_DOMCTL_settscinfo */ +struct xen_guest_tsc_info { + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint32_t pad; + uint64_aligned_t elapsed_nsec; +}; +typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); +typedef struct xen_domctl_tsc_info { + XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ + xen_guest_tsc_info_t info; /* IN */ +} xen_domctl_tsc_info_t; + +/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ +struct xen_domctl_gdbsx_memio { + /* IN */ + uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ + uint64_aligned_t gva; /* guest virtual address */ + uint64_aligned_t uva; /* user buffer virtual address */ + uint32_t len; /* number of bytes to read/write */ + uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ + /* OUT */ + uint32_t remain; /* bytes remaining to be copied */ +}; + +/* XEN_DOMCTL_gdbsx_pausevcpu */ +/* XEN_DOMCTL_gdbsx_unpausevcpu */ +struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ + uint32_t vcpu; /* which vcpu */ +}; + +/* XEN_DOMCTL_gdbsx_domstatus */ +struct xen_domctl_gdbsx_domstatus { + /* OUT */ + uint8_t paused; /* is the domain paused */ + uint32_t vcpu_id; /* any vcpu in an event? */ + uint32_t vcpu_ev; /* if yes, what event? */ +}; + +/* + * Memory event operations + */ + +/* XEN_DOMCTL_mem_event_op */ + +/* + * Domain memory paging + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. + * + * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several + * non-standard error codes to indicate why paging could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EMLINK - guest has iommu passthrough enabled + * EXDEV - guest has PoD enabled + * EBUSY - guest has or had paging enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 + +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 + +/* + * Access permissions. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * There are HVM hypercalls to set the per-page access permissions of every + * page in a domain. When one of these permissions--independent, read, + * write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. + * + * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several + * non-standard error codes to indicate why access could not be enabled: + * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest + * EBUSY - guest has or had access enabled, ring buffer still active + */ +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 + +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 + +/* + * Sharing ENOMEM helper. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * If setup, this ring is used to communicate failed allocations + * in the unshare path. XENMEM_sharing_op_resume is used to wake up + * vcpus that could not unshare. + * + * Note that shring can be turned on (as per the domctl below) + * *without* this ring being setup. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 + +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 + +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ +struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + + uint32_t port; /* OUT: event channel for ring */ +}; +typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); + +/* + * Memory sharing operations + */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 + +struct xen_domctl_mem_sharing_op { + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ + + union { + uint8_t enable; /* CONTROL */ + } u; +}; +typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); + +struct xen_domctl_audit_p2m { + /* OUT error counts */ + uint64_t orphans; + uint64_t m2p_bad; + uint64_t p2m_bad; +}; +typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); + +struct xen_domctl_set_virq_handler { + uint32_t virq; /* IN */ +}; +typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); + +#if defined(__i386__) || defined(__x86_64__) +/* XEN_DOMCTL_setvcpuextstate */ +/* XEN_DOMCTL_getvcpuextstate */ +struct xen_domctl_vcpuextstate { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: xfeature support mask of struct (IN) + * GET: xfeature support mask of struct (IN/OUT) + * xfeature mask is served as identifications of the saving format + * so that compatible CPUs can have a check on format to decide + * whether it can restore. + */ + uint64_aligned_t xfeature_mask; + /* + * SET: Size of struct (IN) + * GET: Size of struct (IN/OUT) + */ + uint64_aligned_t size; + XEN_GUEST_HANDLE_64(uint64) buffer; +}; +typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); +#endif + +/* XEN_DOMCTL_set_access_required: sets whether a memory event listener + * must be present to handle page access events: if false, the page + * access will revert to full permissions if no one is listening; + * */ +struct xen_domctl_set_access_required { + uint8_t access_required; +}; +typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); + +struct xen_domctl_set_broken_page_p2m { + uint64_aligned_t pfn; +}; +typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); + +struct xen_domctl { + uint32_t cmd; +#define XEN_DOMCTL_createdomain 1 +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_getmemlist 6 +#define XEN_DOMCTL_getpageframeinfo 7 +#define XEN_DOMCTL_getpageframeinfo2 8 +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_shadow_op 10 +#define XEN_DOMCTL_max_mem 11 +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +#define XEN_DOMCTL_getvcpuinfo 14 +#define XEN_DOMCTL_max_vcpus 15 +#define XEN_DOMCTL_scheduler_op 16 +#define XEN_DOMCTL_setdomainhandle 17 +#define XEN_DOMCTL_setdebugging 18 +#define XEN_DOMCTL_irq_permission 19 +#define XEN_DOMCTL_iomem_permission 20 +#define XEN_DOMCTL_ioport_permission 21 +#define XEN_DOMCTL_hypercall_init 22 +#define XEN_DOMCTL_arch_setup 23 +#define XEN_DOMCTL_settimeoffset 24 +#define XEN_DOMCTL_getvcpuaffinity 25 +#define XEN_DOMCTL_real_mode_area 26 +#define XEN_DOMCTL_resumedomain 27 +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_subscribe 29 +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_ioport_mapping 40 +#define XEN_DOMCTL_pin_mem_cacheattr 41 +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +#define XEN_DOMCTL_set_opt_feature 44 /* Obsolete IA64 only */ +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_set_target 46 +#define XEN_DOMCTL_deassign_device 47 +#define XEN_DOMCTL_unbind_pt_irq 48 +#define XEN_DOMCTL_set_cpuid 49 +#define XEN_DOMCTL_get_device_group 50 +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 +#define XEN_DOMCTL_suppress_spurious_page_faults 53 +#define XEN_DOMCTL_debug_op 54 +#define XEN_DOMCTL_gethvmcontext_partial 55 +#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_mem_sharing_op 57 +#define XEN_DOMCTL_disable_migrate 58 +#define XEN_DOMCTL_gettscinfo 59 +#define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 +#define XEN_DOMCTL_setvcpuextstate 62 +#define XEN_DOMCTL_getvcpuextstate 63 +#define XEN_DOMCTL_set_access_required 64 +#define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_set_broken_page_p2m 67 +#define XEN_DOMCTL_setnodeaffinity 68 +#define XEN_DOMCTL_getnodeaffinity 69 +#define XEN_DOMCTL_gdbsx_guestmemio 1000 +#define XEN_DOMCTL_gdbsx_pausevcpu 1001 +#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 +#define XEN_DOMCTL_gdbsx_domstatus 1003 + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_createdomain createdomain; + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_getmemlist getmemlist; + struct xen_domctl_getpageframeinfo getpageframeinfo; + struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; + struct xen_domctl_nodeaffinity nodeaffinity; + struct xen_domctl_vcpuaffinity vcpuaffinity; + struct xen_domctl_shadow_op shadow_op; + struct xen_domctl_max_mem max_mem; + struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; + struct xen_domctl_max_vcpus max_vcpus; + struct xen_domctl_scheduler_op scheduler_op; + struct xen_domctl_setdomainhandle setdomainhandle; + struct xen_domctl_setdebugging setdebugging; + struct xen_domctl_irq_permission irq_permission; + struct xen_domctl_iomem_permission iomem_permission; + struct xen_domctl_ioport_permission ioport_permission; + struct xen_domctl_hypercall_init hypercall_init; + struct xen_domctl_arch_setup arch_setup; + struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_disable_migrate disable_migrate; + struct xen_domctl_tsc_info tsc_info; + struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; + struct xen_domctl_assign_device assign_device; + struct xen_domctl_bind_pt_irq bind_pt_irq; + struct xen_domctl_memory_mapping memory_mapping; + struct xen_domctl_ioport_mapping ioport_mapping; + struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; + struct xen_domctl_ext_vcpucontext ext_vcpucontext; + struct xen_domctl_set_target set_target; + struct xen_domctl_subscribe subscribe; + struct xen_domctl_debug_op debug_op; + struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_mem_sharing_op mem_sharing_op; +#if defined(__i386__) || defined(__x86_64__) + struct xen_domctl_cpuid cpuid; + struct xen_domctl_vcpuextstate vcpuextstate; +#endif + struct xen_domctl_set_access_required access_required; + struct xen_domctl_audit_p2m audit_p2m; + struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; + struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; + struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + uint8_t pad[128]; + } u; +}; +typedef struct xen_domctl xen_domctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); + +#endif /* __XEN_PUBLIC_DOMCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/elfnote.h b/sys/src/9/xen/xen-public/elfnote.h new file mode 100644 index 000000000..8566a7e5c --- /dev/null +++ b/sys/src/9/xen/xen-public/elfnote.h @@ -0,0 +1,271 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + * + * String values (for non-legacy) are NULL terminated ASCII, also known + * as ASCIZ type. + */ + +/* + * NAME=VALUE pair (string). + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the acutal required + * psuedo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + * This is a numeric value. + * + * Default is 0 + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/event_channel.h b/sys/src/9/xen/xen-public/event_channel.h new file mode 100644 index 000000000..472efdba1 --- /dev/null +++ b/sys/src/9/xen/xen-public/event_channel.h @@ -0,0 +1,294 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include "xen.h" + +/* + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). + */ + +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +/* ` } */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as + * accepting interdomain bindings from domain <remote_dom>. A fresh port + * is allocated in <dom> and returned as <port>. + * NOTES: + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * <local_port>. + * NOTES: + * 1. <remote_dom> may be DOMID_SELF, allowing loopback connections. + */ +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; /* enum virq */ + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>). + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel <port>. If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is <port>. + */ +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at <dom, port>. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which <dom> is not DOMID_SELF. + */ +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* enum event_channel_op */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/features.h b/sys/src/9/xen/xen-public/features.h new file mode 100644 index 000000000..a149aa650 --- /dev/null +++ b/sys/src/9/xen/xen-public/features.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES + * + * The list of all the features the guest supports. They are set by + * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES + * string. The format is the feature names (as given here without the + * "XENFEAT_" prefix) separated by '|' characters. + * If a feature is required for the kernel to function then the feature name + * must be preceded by a '!' character. + * + * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the + * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0, + */ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + +/* x86: Does this Xen host support the HVM callback vector type? */ +#define XENFEAT_hvm_callback_vector 8 + +/* x86: pvclock algorithm is safe to use on HVM */ +#define XENFEAT_hvm_safe_pvclock 9 + +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + +/* operation as Dom0 is supported */ +#define XENFEAT_dom0 11 + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/gcov.h b/sys/src/9/xen/xen-public/gcov.h new file mode 100644 index 000000000..1b29b48a7 --- /dev/null +++ b/sys/src/9/xen/xen-public/gcov.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * gcov.h + * + * Coverage structures exported by Xen. + * Structure is different from Gcc one. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2013, Citrix Systems R&D Ltd. + */ + +#ifndef __XEN_PUBLIC_GCOV_H__ +#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__ + +#define XENCOV_COUNTERS 5 +#define XENCOV_TAG_BASE 0x58544300u +#define XENCOV_TAG_FILE (XENCOV_TAG_BASE+0x46u) +#define XENCOV_TAG_FUNC (XENCOV_TAG_BASE+0x66u) +#define XENCOV_TAG_COUNTER(n) (XENCOV_TAG_BASE+0x30u+((n)&0xfu)) +#define XENCOV_TAG_END (XENCOV_TAG_BASE+0x2eu) +#define XENCOV_IS_TAG_COUNTER(n) \ + ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS)) +#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0)) + +/* + * The main structure for the blob is + * BLOB := FILE.. END + * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS + * FILENAME := LEN characters + * characters are padded to 32 bit + * LEN := 32 bit value + * COUNTERS := TAG_COUNTER(n) NUM COUNTER.. + * NUM := 32 bit valie + * COUNTER := 64 bit value + * FUNCTIONS := TAG_FUNC NUM FUNCTION.. + * FUNCTION := IDENT CHECKSUM NUM_COUNTERS + * + * All tagged structures are aligned to 8 bytes + */ + +/** + * File information + * Prefixed with XENCOV_TAG_FILE and a string with filename + * Aligned to 8 bytes + */ +struct xencov_file +{ + uint32_t tag; /* XENCOV_TAG_FILE */ + uint32_t version; + uint32_t stamp; + uint32_t fn_len; + char filename[1]; +}; + + +/** + * Counters information + * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1) + * Aligned to 8 bytes + */ +struct xencov_counter +{ + uint32_t tag; /* XENCOV_TAG_COUNTER(n) */ + uint32_t num; + uint64_t values[1]; +}; + +/** + * Information for each function + * Number of counter is equal to the number of counter structures got before + */ +struct xencov_function +{ + uint32_t ident; + uint32_t checksum; + uint32_t num_counters[1]; +}; + +/** + * Information for all functions + * Aligned to 8 bytes + */ +struct xencov_functions +{ + uint32_t tag; /* XENCOV_TAG_FUNC */ + uint32_t num; + struct xencov_function xencov_function[1]; +}; + +/** + * Terminator + */ +struct xencov_end +{ + uint32_t tag; /* XENCOV_TAG_END */ +}; + +#endif /* __XEN_PUBLIC_GCOV_H__ */ + diff --git a/sys/src/9/xen/xen-public/grant_table.h b/sys/src/9/xen/xen-public/grant_table.h new file mode 100644 index 000000000..b8a3d6ceb --- /dev/null +++ b/sys/src/9/xen/xen-public/grant_table.h @@ -0,0 +1,662 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include "xen.h" + +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granter’s memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members + */ + +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access + * by devices and/or host CPUs. If successful, <handle> is a tracking number + * that must be presented later to destroy the mapping(s). On error, <handle> + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in <host_addr>. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by <handle> + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least + * <nr_frames> pages. The frame addresses are written to the <frame_list>. + * Only <nr_frames> addresses are written, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + XEN_GUEST_HANDLE(ulong) frame_list; +#else + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +#endif +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * <domid, ref>. + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_copy gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by <handle> but atomically replace the page table entry with one + * pointing to the machine address under <new_addr>. <new_addr> will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for <dom>. In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * <nr_frames> specify the size of vector <frame_list>. + * The frame addresses are returned in the <frame_list>. + * Only <nr_frames> addresses are returned, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain <dom>. + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +/* ` enum grant_status { */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/hvm/e820.h b/sys/src/9/xen/xen-public/hvm/e820.h new file mode 100644 index 000000000..5bdc22741 --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/e820.h @@ -0,0 +1,34 @@ + +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_E820_H__ +#define __XEN_PUBLIC_HVM_E820_H__ + +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + +#endif /* __XEN_PUBLIC_HVM_E820_H__ */ diff --git a/sys/src/9/xen/xen-public/hvm/hvm_info_table.h b/sys/src/9/xen/xen-public/hvm/hvm_info_table.h new file mode 100644 index 000000000..36085fa5c --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/hvm_info_table.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * hvm/hvm_info_table.h + * + * HVM parameter and information table, written into guest memory map. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ + +#define HVM_INFO_PFN 0x09F +#define HVM_INFO_OFFSET 0x800 +#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) + +/* Maximum we can support with current vLAPIC ID mapping. */ +#define HVM_MAX_VCPUS 128 + +struct hvm_info_table { + char signature[8]; /* "HVM INFO" */ + uint32_t length; + uint8_t checksum; + + /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ + uint8_t apic_mode; + + /* How many CPUs does this domain have? */ + uint32_t nr_vcpus; + + /* + * MEMORY MAP provided by HVM domain builder. + * Notes: + * 1. page_to_phys(x) = x << 12 + * 2. If a field is zero, the corresponding range does not exist. + */ + /* + * 0x0 to page_to_phys(low_mem_pgend)-1: + * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) + */ + uint32_t low_mem_pgend; + /* + * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: + * Reserved for special memory mappings + */ + uint32_t reserved_mem_pgstart; + /* + * 0x100000000 to page_to_phys(high_mem_pgend)-1: + * RAM above 4GB + */ + uint32_t high_mem_pgend; + + /* Bitmap of which CPUs are online at boot time. */ + uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; +}; + +#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ diff --git a/sys/src/9/xen/xen-public/hvm/hvm_op.h b/sys/src/9/xen/xen-public/hvm/hvm_op.h new file mode 100644 index 000000000..a9aab4bc3 --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/hvm_op.h @@ -0,0 +1,275 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +#include "../xen.h" +#include "../trace.h" + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Track dirty VRAM. */ +#define HVMOP_track_dirty_vram 6 +struct xen_hvm_track_dirty_vram { + /* Domain to be tracked. */ + domid_t domid; + /* First pfn to track. */ + uint64_aligned_t first_pfn; + /* Number of pages to track. */ + uint64_aligned_t nr; + /* OUT variable. */ + /* Dirty bitmap buffer. */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; +}; +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); + +/* Notify that some pages got modified by the Device Model. */ +#define HVMOP_modified_memory 7 +struct xen_hvm_modified_memory { + /* Domain to be updated. */ + domid_t domid; + /* First pfn. */ + uint64_aligned_t first_pfn; + /* Number of pages. */ + uint64_aligned_t nr; +}; +typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); + +#define HVMOP_set_mem_type 8 +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ + /* guest physical address of the toplevel pagetable dying */ + uint64_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#define HVMOP_set_mem_access 12 +typedef enum { + HVMMEM_access_n, + HVMMEM_access_r, + HVMMEM_access_w, + HVMMEM_access_rw, + HVMMEM_access_x, + HVMMEM_access_rx, + HVMMEM_access_wx, + HVMMEM_access_rwx, + HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically + * change to r-w on a write */ + HVMMEM_access_n2rwx, /* Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu */ + HVMMEM_access_default /* Take the domain default */ +} hvmmem_access_t; +/* Notify that a region of memory is to have specific access types */ +struct xen_hvm_set_mem_access { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Number of pages, ignored on setting default access */ + uint32_t nr; + /* First pfn, or ~0ull to set the default access for new pages */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t); + +#define HVMOP_get_mem_access 13 +/* Get the specific access type for that region of memory */ +struct xen_hvm_get_mem_access { + /* Domain to be queried. */ + domid_t domid; + /* Memory type: OUT */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* pfn, or ~0ull for default access for new pages. IN */ + uint64_aligned_t pfn; +}; +typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t); + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Vector number */ + uint32_t vector; + /* Trap type (HVMOP_TRAP_*) */ + uint32_t type; +/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ +# define HVMOP_TRAP_ext_int 0 /* external interrupt */ +# define HVMOP_TRAP_nmi 2 /* nmi */ +# define HVMOP_TRAP_hw_exc 3 /* hardware exception */ +# define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */ +# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */ +# define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */ + /* Error code, or ~0u to skip */ + uint32_t error_code; + /* Intruction length */ + uint32_t insn_len; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/sys/src/9/xen/xen-public/hvm/hvm_xs_strings.h b/sys/src/9/xen/xen-public/hvm/hvm_xs_strings.h new file mode 100644 index 000000000..8aec93511 --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/hvm_xs_strings.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * hvm/hvm_xs_strings.h + * + * HVM xenstore strings used in HVMLOADER. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ +#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ + +#define HVM_XS_HVMLOADER "hvmloader" +#define HVM_XS_BIOS "hvmloader/bios" +#define HVM_XS_GENERATION_ID_ADDRESS "hvmloader/generation-id-address" +#define HVM_XS_ALLOW_MEMORY_RELOCATE "hvmloader/allow-memory-relocate" + +/* The following values allow additional ACPI tables to be added to the + * virtual ACPI BIOS that hvmloader constructs. The values specify the guest + * physical address and length of a block of ACPI tables to add. The format of + * the block is simply concatenated raw tables (which specify their own length + * in the ACPI header). + */ +#define HVM_XS_ACPI_PT_ADDRESS "hvmloader/acpi/address" +#define HVM_XS_ACPI_PT_LENGTH "hvmloader/acpi/length" + +/* Any number of SMBIOS types can be passed through to an HVM guest using + * the following xenstore values. The values specify the guest physical + * address and length of a block of SMBIOS structures for hvmloader to use. + * The block is formatted in the following way: + * + * <length><struct><length><struct>... + * + * Each length separator is a 32b integer indicating the length of the next + * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct + * will replace the default structure in hvmloader. In addition, any + * OEM/vendortypes (128 - 255) will all be added. + */ +#define HVM_XS_SMBIOS_PT_ADDRESS "hvmloader/smbios/address" +#define HVM_XS_SMBIOS_PT_LENGTH "hvmloader/smbios/length" + +/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */ +#define HVM_XS_SMBIOS_DEFAULT_BATTERY "hvmloader/smbios/default_battery" + +/* The following xenstore values are used to override some of the default + * string values in the SMBIOS table constructed in hvmloader. + */ +#define HVM_XS_BIOS_STRINGS "bios-strings" +#define HVM_XS_BIOS_VENDOR "bios-strings/bios-vendor" +#define HVM_XS_BIOS_VERSION "bios-strings/bios-version" +#define HVM_XS_SYSTEM_MANUFACTURER "bios-strings/system-manufacturer" +#define HVM_XS_SYSTEM_PRODUCT_NAME "bios-strings/system-product-name" +#define HVM_XS_SYSTEM_VERSION "bios-strings/system-version" +#define HVM_XS_SYSTEM_SERIAL_NUMBER "bios-strings/system-serial-number" +#define HVM_XS_ENCLOSURE_MANUFACTURER "bios-strings/enclosure-manufacturer" +#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number" +#define HVM_XS_BATTERY_MANUFACTURER "bios-strings/battery-manufacturer" +#define HVM_XS_BATTERY_DEVICE_NAME "bios-strings/battery-device-name" + +/* 1 to 99 OEM strings can be set in xenstore using values of the form + * below. These strings will be loaded into the SMBIOS type 11 structure. + */ +#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" + +#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ diff --git a/sys/src/9/xen/xen-public/hvm/ioreq.h b/sys/src/9/xen/xen-public/hvm/ioreq.h new file mode 100644 index 000000000..f05d130c7 --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/ioreq.h @@ -0,0 +1,122 @@ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t _pad1:1; + uint8_t type; /* I/O type */ +}; +typedef struct ioreq ioreq_t; + +struct shared_iopage { + struct ioreq vcpu_ioreq[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad:1; + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr:20;/* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/hvm/params.h b/sys/src/9/xen/xen-public/hvm/params.h new file mode 100644 index 000000000..517a18410 --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/params.h @@ -0,0 +1,150 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include "hvm_op.h" + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 + +#if defined(__i386__) || defined(__x86_64__) + +/* Expose Viridian interfaces to this HVM guest? */ +#define HVM_PARAM_VIRIDIAN 9 + +#endif + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in <hvm/ioreq.h> + */ +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 +#define HVM_PARAM_MEMORY_EVENT_MSR 30 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +/* Boolean: Enable nestedhvm (hvm only) */ +#define HVM_PARAM_NESTEDHVM 24 + +/* Params for the mem event rings */ +#define HVM_PARAM_PAGING_RING_PFN 27 +#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_SHARING_RING_PFN 29 + +/* SHUTDOWN_* action in case of a triple fault */ +#define HVM_PARAM_TRIPLE_FAULT_REASON 31 + +#define HVM_NR_PARAMS 32 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/sys/src/9/xen/xen-public/hvm/pvdrivers.h b/sys/src/9/xen/xen-public/hvm/pvdrivers.h new file mode 100644 index 000000000..4c6b705fb --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/pvdrivers.h @@ -0,0 +1,47 @@ +/* + * pvdrivers.h: Register of PV drivers product numbers. + * Copyright (c) 2012, Citrix Systems Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_PVDRIVERS_H_ +#define _XEN_PUBLIC_PVDRIVERS_H_ + +/* + * This is the master registry of product numbers for + * PV drivers. + * If you need a new product number allocating, please + * post to xen-devel@lists.xensource.com. You should NOT use + * a product number without allocating one. + * If you maintain a separate versioning and distribution path + * for PV drivers you should have a separate product number so + * that your drivers can be separated from others. + * + * During development, you may use the product ID to + * indicate a driver which is yet to be released. + */ + +#define PVDRIVERS_PRODUCT_LIST(EACH) \ + EACH("xensource-windows", 0x0001) /* Citrix */ \ + EACH("gplpv-windows", 0x0002) /* James Harper */ \ + EACH("linux", 0x0003) \ + EACH("experimental", 0xffff) + +#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */ diff --git a/sys/src/9/xen/xen-public/hvm/save.h b/sys/src/9/xen/xen-public/hvm/save.h new file mode 100644 index 000000000..cc8b5fdce --- /dev/null +++ b/sys/src/9/xen/xen-public/hvm/save.h @@ -0,0 +1,111 @@ +/* + * hvm/save.h + * + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_H__ +#define __XEN_PUBLIC_HVM_SAVE_H__ + +/* + * Structures in this header *must* have the same layout in 32bit + * and 64bit environments: this means that all fields must be explicitly + * sized types and aligned to their sizes, and the structs must be + * a multiple of eight bytes long. + * + * Only the state necessary for saving and restoring (i.e. fields + * that are analogous to actual hardware state) should go in this file. + * Internal mechanisms should be kept in Xen-private headers. + */ + +#if !defined(__GNUC__) || defined(__STRICT_ANSI__) +#error "Anonymous structs/unions are a GNU extension." +#endif + +/* + * Each entry is preceded by a descriptor giving its type and length + */ +struct hvm_save_descriptor { + uint16_t typecode; /* Used to demux the various types below */ + uint16_t instance; /* Further demux within a type */ + uint32_t length; /* In bytes, *not* including this descriptor */ +}; + + +/* + * Each entry has a datatype associated with it: for example, the CPU state + * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), + * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). + * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system + * ugliness. + */ + +#ifdef __XEN__ +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } + +# include <xen/lib.h> /* BUG() */ +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } +#else +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} + +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} +#endif + +#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) +#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) +#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) + +#ifdef __XEN__ +# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) +# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) + +# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) +# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +#endif + +/* + * The series of save records is teminated by a zero-type, zero-length + * descriptor. + */ + +struct hvm_save_end {}; +DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); + +#if defined(__i386__) || defined(__x86_64__) +#include "../arch-x86/hvm/save.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "../arch-arm/hvm/save.h" +#else +#error "unsupported architecture" +#endif + +#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ diff --git a/sys/src/9/xen/xen-public/io/blkif.h b/sys/src/9/xen/xen-public/io/blkif.h new file mode 100644 index 000000000..b9b9d9857 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/blkif.h @@ -0,0 +1,556 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * backend driver to open the backing device. (e.g. the path to the + * file or block device representing the backing store.) + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-aligment + * Values: <uint32_t> + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: <uint32_t> + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: <uint32_t> (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: <uint32_t> + * + * The logical sector size, in bytes, of the backend device. + * + * physical-sector-size + * Values: <uint32_t> + * + * The physical sector size, in bytes, of the backend device. + * + * sectors + * Values: <uint64_t> + * + * The size of the backend device, expressed in units of its logical + * sector size ("sector-size"). + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: <uint32_t> + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: <uint32_t> + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: <uint32_t> + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: <uint32_t> + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate + * space (discardable extents) in units that are larger than the + * exported logical block size. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "physical-sector-size" node in + * the backend xenbus info. Also the xenbus "sectors" node is expressed in + * 512-byte units. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + uint8_t operation; /* BLKIF_OP_DISCARD */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to discard*/ +}; +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/console.h b/sys/src/9/xen/xen-public/io/console.h new file mode 100644 index 000000000..e2cd97f77 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/console.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/fbif.h b/sys/src/9/xen/xen-public/io/fbif.h new file mode 100644 index 000000000..cc25aab32 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/fbif.h @@ -0,0 +1,176 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +/* + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. + */ +#ifdef __KERNEL__ +#define XENFB_WIDTH 800 +#define XENFB_HEIGHT 600 +#define XENFB_DEPTH 32 +#endif + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/fsif.h b/sys/src/9/xen/xen-public/io/fsif.h new file mode 100644 index 000000000..8fc21740e --- /dev/null +++ b/sys/src/9/xen/xen-public/io/fsif.h @@ -0,0 +1,192 @@ +/****************************************************************************** + * fsif.h + * + * Interface to FS level split device drivers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007, Grzegorz Milos, <gm281@cam.ac.uk>. + */ + +#ifndef __XEN_PUBLIC_IO_FSIF_H__ +#define __XEN_PUBLIC_IO_FSIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +#define REQ_FILE_OPEN 1 +#define REQ_FILE_CLOSE 2 +#define REQ_FILE_READ 3 +#define REQ_FILE_WRITE 4 +#define REQ_STAT 5 +#define REQ_FILE_TRUNCATE 6 +#define REQ_REMOVE 7 +#define REQ_RENAME 8 +#define REQ_CREATE 9 +#define REQ_DIR_LIST 10 +#define REQ_CHMOD 11 +#define REQ_FS_SPACE 12 +#define REQ_FILE_SYNC 13 + +struct fsif_open_request { + grant_ref_t gref; +}; + +struct fsif_close_request { + uint32_t fd; +}; + +struct fsif_read_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_write_request { + uint32_t fd; + int32_t pad; + uint64_t len; + uint64_t offset; + grant_ref_t grefs[1]; /* Variable length */ +}; + +struct fsif_stat_request { + uint32_t fd; +}; + +/* This structure is a copy of some fields from stat structure, returned + * via the ring. */ +struct fsif_stat_response { + int32_t stat_mode; + uint32_t stat_uid; + uint32_t stat_gid; + int32_t stat_ret; + int64_t stat_size; + int64_t stat_atime; + int64_t stat_mtime; + int64_t stat_ctime; +}; + +struct fsif_truncate_request { + uint32_t fd; + int32_t pad; + int64_t length; +}; + +struct fsif_remove_request { + grant_ref_t gref; +}; + +struct fsif_rename_request { + uint16_t old_name_offset; + uint16_t new_name_offset; + grant_ref_t gref; +}; + +struct fsif_create_request { + int8_t directory; + int8_t pad; + int16_t pad2; + int32_t mode; + grant_ref_t gref; +}; + +struct fsif_list_request { + uint32_t offset; + grant_ref_t gref; +}; + +#define NR_FILES_SHIFT 0 +#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */ +#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT) +#define ERROR_SIZE 32 /* 32 bits for the error mask */ +#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT) +#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT) +#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE) +#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT) + +struct fsif_chmod_request { + uint32_t fd; + int32_t mode; +}; + +struct fsif_space_request { + grant_ref_t gref; +}; + +struct fsif_sync_request { + uint32_t fd; +}; + + +/* FS operation request */ +struct fsif_request { + uint8_t type; /* Type of the request */ + uint8_t pad; + uint16_t id; /* Request ID, copied to the response */ + uint32_t pad2; + union { + struct fsif_open_request fopen; + struct fsif_close_request fclose; + struct fsif_read_request fread; + struct fsif_write_request fwrite; + struct fsif_stat_request fstat; + struct fsif_truncate_request ftruncate; + struct fsif_remove_request fremove; + struct fsif_rename_request frename; + struct fsif_create_request fcreate; + struct fsif_list_request flist; + struct fsif_chmod_request fchmod; + struct fsif_space_request fspace; + struct fsif_sync_request fsync; + } u; +}; +typedef struct fsif_request fsif_request_t; + +/* FS operation response */ +struct fsif_response { + uint16_t id; + uint16_t pad1; + uint32_t pad2; + union { + uint64_t ret_val; + struct fsif_stat_response fstat; + } u; +}; + +typedef struct fsif_response fsif_response_t; + +#define FSIF_RING_ENTRY_SIZE 64 + +#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \ + sizeof(grant_ref_t) + 1) +#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \ + sizeof(grant_ref_t) + 1) + +DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response); + +#define STATE_INITIALISED "init" +#define STATE_READY "ready" +#define STATE_CLOSING "closing" +#define STATE_CLOSED "closed" + + +#endif diff --git a/sys/src/9/xen/xen-public/io/kbdif.h b/sys/src/9/xen/xen-public/io/kbdif.h new file mode 100644 index 000000000..2d2aebdd3 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/kbdif.h @@ -0,0 +1,132 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* Pointer movement event */ +#define XENKBD_TYPE_MOTION 1 +/* Event type 2 currently not used */ +/* Key event (includes pointer buttons) */ +#define XENKBD_TYPE_KEY 3 +/* + * Pointer position event + * Capable backend sets feature-abs-pointer in xenstore. + * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting + * request-abs-update in xenstore. + */ +#define XENKBD_TYPE_POS 4 + +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/libxenvchan.h b/sys/src/9/xen/xen-public/io/libxenvchan.h new file mode 100644 index 000000000..5c3d3d418 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/libxenvchan.h @@ -0,0 +1,97 @@ +/** + * @file + * @section AUTHORS + * + * Copyright (C) 2010 Rafal Wojtczuk <rafal@invisiblethingslab.com> + * + * Authors: + * Rafal Wojtczuk <rafal@invisiblethingslab.com> + * Daniel De Graaf <dgdegra@tycho.nsa.gov> + * + * @section LICENSE + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @section DESCRIPTION + * + * Originally borrowed from the Qubes OS Project, http://www.qubes-os.org, + * this code has been substantially rewritten to use the gntdev and gntalloc + * devices instead of raw MFNs and map_foreign_range. + * + * This is a library for inter-domain communication. A standard Xen ring + * buffer is used, with a datagram-based interface built on top. The grant + * reference and event channels are shared in XenStore under a user-specified + * path. + * + * The ring.h macros define an asymmetric interface to a shared data structure + * that assumes all rings reside in a single contiguous memory space. This is + * not suitable for vchan because the interface to the ring is symmetric except + * for the setup. Unlike the producer-consumer rings defined in ring.h, the + * size of the rings used in vchan are determined at execution time instead of + * compile time, so the macros in ring.h cannot be used to access the rings. + */ + +#include <stdint.h> +#include <sys/types.h> + +struct ring_shared { + uint32_t cons, prod; +}; + +#define VCHAN_NOTIFY_WRITE 0x1 +#define VCHAN_NOTIFY_READ 0x2 + +/** + * vchan_interface: primary shared data structure + */ +struct vchan_interface { + /** + * Standard consumer/producer interface, one pair per buffer + * left is client write, server read + * right is client read, server write + */ + struct ring_shared left, right; + /** + * size of the rings, which determines their location + * 10 - at offset 1024 in ring's page + * 11 - at offset 2048 in ring's page + * 12+ - uses 2^(N-12) grants to describe the multi-page ring + * These should remain constant once the page is shared. + * Only one of the two orders can be 10 (or 11). + */ + uint16_t left_order, right_order; + /** + * Shutdown detection: + * 0: client (or server) has exited + * 1: client (or server) is connected + * 2: client has not yet connected + */ + uint8_t cli_live, srv_live; + /** + * Notification bits: + * VCHAN_NOTIFY_WRITE: send notify when data is written + * VCHAN_NOTIFY_READ: send notify when data is read (consumed) + * cli_notify is used for the client to inform the server of its action + */ + uint8_t cli_notify, srv_notify; + /** + * Grant list: ordering is left, right. Must not extend into actual ring + * or grow beyond the end of the initial shared page. + * These should remain constant once the page is shared, to allow + * for possible remapping by a client that restarts. + */ + uint32_t grants[0]; +}; + diff --git a/sys/src/9/xen/xen-public/io/netif.h b/sys/src/9/xen/xen-public/io/netif.h new file mode 100644 index 000000000..d477751d7 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/netif.h @@ -0,0 +1,236 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * This is the 'wire' format for packets: + * Request 1: netif_tx_request -- NETTXF_* (any flags) + * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) + * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * Request 4: netif_tx_request -- NETTXF_more_data + * Request 5: netif_tx_request -- NETTXF_more_data + * ... + * Request N: netif_tx_request -- 0 + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF +struct netif_tx_request { + grant_ref_t gref; /* Reference to buffer page */ + uint16_t offset; /* Offset within buffer page */ + uint16_t flags; /* NETTXF_* */ + uint16_t id; /* Echoed in response message. */ + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MAX (4) + +/* netif_extra_info flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types - only TCPv4 currently supported. */ +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) + +/* + * This structure needs to fit within both netif_tx_request and + * netif_rx_response for compatibility. + */ +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + /* + * XEN_NETIF_EXTRA_TYPE_GSO: + */ + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + /* + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * Backend advertises availability via 'feature-multicast-control' + * xenbus node containing value '1'. + * Frontend requests this feature by advertising + * 'request-multicast-control' xenbus node containing value '1'. + * If multicast control is requested then multicast flooding is + * disabled and the frontend must explicitly register its interest + * in multicast groups using dummy transmit requests containing + * MCAST_{ADD,DEL} extra-info fragments. + */ + struct { + uint8_t addr[6]; /* Address to add/remove. */ + } mcast; + + uint16_t pad[3]; + } u; +}; +typedef struct netif_extra_info netif_extra_info_t; + +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ + uint16_t flags; /* NETRXF_* */ + int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ +#define NETIF_RSP_NULL 1 + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/pciif.h b/sys/src/9/xen/xen-public/io/pciif.h new file mode 100644 index 000000000..b7112eea5 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/pciif.h @@ -0,0 +1,124 @@ +/* + * PCI Backend/Frontend Common Data Structures & Macros + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +/*used for pcie aer handling*/ +struct xen_pcie_aer_op +{ + + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/protocols.h b/sys/src/9/xen/xen-public/io/protocols.h new file mode 100644 index 000000000..80b196bc3 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/protocols.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff --git a/sys/src/9/xen/xen-public/io/ring.h b/sys/src/9/xen/xen-public/io/ring.h new file mode 100644 index 000000000..73e13d7ae --- /dev/null +++ b/sys/src/9/xen/xen-public/io/ring.h @@ -0,0 +1,312 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/tpmif.h b/sys/src/9/xen/xen-public/io/tpmif.h new file mode 100644 index 000000000..9743dc936 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/tpmif.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +#include "../grant_table.h" + +struct tpmif_tx_request { + unsigned long addr; /* Machine address of packet. */ + grant_ref_t ref; /* grant table access reference */ + uint16_t unused; + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef uint32_t TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 1 + +/* This structure must fit in a memory page. */ + +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; + +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; + +/****************************************************************************** + * TPM I/O interface for Xen guest OSes, v2 + * + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> + * + * This protocol emulates the request/response behavior of a TPM using a Xen + * shared memory interface. All interaction with the TPM is at the direction + * of the frontend, since a TPM (hardware or virtual) is a passive device - + * the backend only processes commands as requested by the frontend. + * + * The frontend sends a request to the TPM by populating the shared page with + * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending + * and event channel notification. When the backend is finished, it will set + * the state to TPMIF_STATE_FINISH and send an event channel notification. + * + * In order to allow long-running commands to be canceled, the frontend can + * at any time change the state to TPMIF_STATE_CANCEL and send a notification. + * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH) + * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM + * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH + * if another reason for cancellation is required - for example, a physical + * TPM may cancel a command if the interface is seized by another locality. + * + * The TPM command format is defined by the TCG, and is available at + * http://www.trustedcomputinggroup.org/resources/tpm_main_specification + */ + +enum tpmif_state { + TPMIF_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ + TPMIF_STATE_SUBMIT, /* request ready / vTPM working */ + TPMIF_STATE_FINISH, /* response ready / vTPM idle */ + TPMIF_STATE_CANCEL, /* cancel requested / vTPM working */ +}; +/* Note: The backend should only change state to IDLE or FINISH, while the + * frontend should only change to SUBMIT or CANCEL. Status changes do not need + * to use atomic operations. + */ + + +/* The shared page for vTPM request/response packets looks like: + * + * Offset Contents + * ================================================= + * 0 struct tpmif_shared_page + * 16 [optional] List of grant IDs + * 16+4*nr_extra_pages TPM packet data + * + * If the TPM packet data extends beyond the end of a single page, the grant IDs + * defined in extra_pages are used as if they were mapped immediately following + * the primary shared page. The grants are allocated by the frontend and mapped + * by the backend. Before sending a request spanning multiple pages, the + * frontend should verify that the TPM supports such large requests by querying + * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM. + */ +struct tpmif_shared_page { + uint32_t length; /* request/response length in bytes */ + + uint8_t state; /* enum tpmif_state */ + uint8_t locality; /* for the current request */ + uint8_t pad; /* should be zero */ + + uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ + uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */ +}; +typedef struct tpmif_shared_page tpmif_shared_page_t; + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/usbif.h b/sys/src/9/xen/xen-public/io/usbif.h new file mode 100644 index 000000000..0af2a38bb --- /dev/null +++ b/sys/src/9/xen/xen-public/io/usbif.h @@ -0,0 +1,150 @@ +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +enum usb_spec_version { + USB_VER_UNKNOWN = 0, + USB_VER_USB11, + USB_VER_USB20, + USB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in usbif_request + * + * bits 0-5 are specific bits for virtual USB driver. + * bits 7-31 are standard urb pipe. + * + * - port number(NEW): bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag(NEW): bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ +#define usbif_pipeportnum(pipe) ((pipe) & 0x1f) +#define usbif_setportnum_pipe(pipe, portnum) \ + ((pipe)|(portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & 0x20) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20)) + +#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) + +/* + * RING for transferring urbs. + */ +struct usbif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct usbif_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct usbif_urb_request usbif_urb_request_t; + +struct usbif_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; +typedef struct usbif_urb_response usbif_urb_response_t; + +DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct usbif_conn_request { + uint16_t id; +}; +typedef struct usbif_conn_request usbif_conn_request_t; + +struct usbif_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +}; +typedef struct usbif_conn_response usbif_conn_response_t; + +DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff --git a/sys/src/9/xen/xen-public/io/vscsiif.h b/sys/src/9/xen/xen-public/io/vscsiif.h new file mode 100644 index 000000000..a50f980e9 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/vscsiif.h @@ -0,0 +1,117 @@ +/****************************************************************************** + * vscsiif.h + * + * Based on the blkif.h code. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright(c) FUJITSU Limited 2008. + */ + +#ifndef __XEN__PUBLIC_IO_SCSI_H__ +#define __XEN__PUBLIC_IO_SCSI_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* commands between backend and frontend */ +#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */ +#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/ +#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/ +#define VSCSIIF_ACT_SCSI_SG_PRESET 4 /* Preset SG elements */ + +/* + * Maximum scatter/gather segments per request. + * + * Considering balance between allocating at least 16 "vscsiif_request" + * structures on one page (4096 bytes) and the number of scatter/gather + * elements needed, we decided to use 26 as a magic number. + */ +#define VSCSIIF_SG_TABLESIZE 26 + +/* + * based on Linux kernel 2.6.18 + */ +#define VSCSIIF_MAX_COMMAND_SIZE 16 +#define VSCSIIF_SENSE_BUFFERSIZE 96 + +struct scsiif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; +typedef struct scsiif_request_segment vscsiif_segment_t; + +struct vscsiif_request { + uint16_t rqid; /* private guest value, echoed in resp */ + uint8_t act; /* command between backend and frontend */ + uint8_t cmd_len; + + uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; + uint16_t timeout_per_command; /* The command is issued by twice + the value in Backend. */ + uint16_t channel, id, lun; + uint16_t padding; + uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1) + DMA_FROM_DEVICE(2) + DMA_NONE(3) requests */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ + + vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE]; + uint32_t reserved[3]; +}; +typedef struct vscsiif_request vscsiif_request_t; + +#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \ + / sizeof(vscsiif_segment_t)) + +struct vscsiif_sg_list { + /* First two fields must match struct vscsiif_request! */ + uint16_t rqid; /* private guest value, must match main req */ + uint8_t act; /* VSCSIIF_ACT_SCSI_SG_PRESET */ + uint8_t nr_segments; /* Number of pieces of scatter-gather */ + vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE]; +}; +typedef struct vscsiif_sg_list vscsiif_sg_list_t; + +struct vscsiif_response { + uint16_t rqid; + uint8_t act; /* valid only when backend supports SG_PRESET */ + uint8_t sense_len; + uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE]; + int32_t rslt; + uint32_t residual_len; /* request bufflen - + return the value from physical device */ + uint32_t reserved[36]; +}; +typedef struct vscsiif_response vscsiif_response_t; + +DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response); + + +#endif /*__XEN__PUBLIC_IO_SCSI_H__*/ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/xenbus.h b/sys/src/9/xen/xen-public/io/xenbus.h new file mode 100644 index 000000000..927f9db55 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/xenbus.h @@ -0,0 +1,80 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/io/xs_wire.h b/sys/src/9/xen/xen-public/io/xs_wire.h new file mode 100644 index 000000000..585f0c8f5 --- /dev/null +++ b/sys/src/9/xen/xen-public/io/xs_wire.h @@ -0,0 +1,138 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#ifdef EINVAL +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) +}; +#endif + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/kexec.h b/sys/src/9/xen/xen-public/kexec.h new file mode 100644 index 000000000..36409ff58 --- /dev/null +++ b/sys/src/9/xen/xen-public/kexec.h @@ -0,0 +1,165 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Xen port written by: + * - Simon 'Horms' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load 1 +#define KEXEC_CMD_kexec_unload 2 +typedef struct xen_kexec_load { + int type; + xen_kexec_image_t image; +} xen_kexec_load_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap + * Note that although this is adjacent + * to Xen it exists in a separate EFI + * region on ia64, and thus needs to be + * inserted into iomem_machine separately */ +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of + * the ia64_boot_param */ +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of + * of the EFI Memory Map */ +#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/mem_event.h b/sys/src/9/xen/xen-public/mem_event.h new file mode 100644 index 000000000..c9ed54650 --- /dev/null +++ b/sys/src/9/xen/xen-public/mem_event.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * mem_event.h + * + * Memory event common structures. + * + * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _XEN_PUBLIC_MEM_EVENT_H +#define _XEN_PUBLIC_MEM_EVENT_H + +#include "xen.h" +#include "io/ring.h" + +/* Memory event flags */ +#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) +#define MEM_EVENT_FLAG_DROP_PAGE (1 << 1) +#define MEM_EVENT_FLAG_EVICT_FAIL (1 << 2) +#define MEM_EVENT_FLAG_FOREIGN (1 << 3) +#define MEM_EVENT_FLAG_DUMMY (1 << 4) + +/* Reasons for the memory event request */ +#define MEM_EVENT_REASON_UNKNOWN 0 /* typical reason */ +#define MEM_EVENT_REASON_VIOLATION 1 /* access violation, GFN is address */ +#define MEM_EVENT_REASON_CR0 2 /* CR0 was hit: gfn is CR0 value */ +#define MEM_EVENT_REASON_CR3 3 /* CR3 was hit: gfn is CR3 value */ +#define MEM_EVENT_REASON_CR4 4 /* CR4 was hit: gfn is CR4 value */ +#define MEM_EVENT_REASON_INT3 5 /* int3 was hit: gla/gfn are RIP */ +#define MEM_EVENT_REASON_SINGLESTEP 6 /* single step was invoked: gla/gfn are RIP */ +#define MEM_EVENT_REASON_MSR 7 /* MSR was hit: gfn is MSR value, gla is MSR address; + does NOT honour HVMPME_onchangeonly */ + +typedef struct mem_event_st { + uint32_t flags; + uint32_t vcpu_id; + + uint64_t gfn; + uint64_t offset; + uint64_t gla; /* if gla_valid */ + + uint32_t p2mt; + + uint16_t access_r:1; + uint16_t access_w:1; + uint16_t access_x:1; + uint16_t gla_valid:1; + uint16_t available:12; + + uint16_t reason; +} mem_event_request_t, mem_event_response_t; + +DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t); + +#endif + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/memory.h b/sys/src/9/xen/xen-public/memory.h new file mode 100644 index 000000000..98cf1f7d8 --- /dev/null +++ b/sys/src/9/xen/xen-public/memory.h @@ -0,0 +1,472 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +#include "xen.h" + +/* + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +#endif + +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + * XENMEM_claim_pages: + * IN: must be zero + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else + unsigned int address_bits; +#endif + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* Source mapping space. */ +/* ` enum phys_map_space { */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, XENMEM_add_to_physmap_range only. */ +/* ` } */ + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Number of pages to go through for gmfn_range */ + uint16_t size; + + unsigned int space; /* => enum phys_map_space */ + +#define XENMAPIDX_grant_table_status 0x80000000 + + /* Index into space being mapped. */ + xen_ulong_t idx; + + /* GPFN in domid where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/* A batched version of add_to_physmap. */ +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { + /* IN */ + /* Which domain to change the mapping for. */ + domid_t domid; + uint16_t space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + uint16_t size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + XEN_GUEST_HANDLE(xen_ulong_t) idxs; + + /* GPFN in domid where the source mapping page should appear. */ + XEN_GUEST_HANDLE(xen_pfn_t) gpfns; + + /* OUT */ + + /* Per index error code. */ + XEN_GUEST_HANDLE(int) errs; +}; +typedef struct xen_add_to_physmap_range xen_add_to_physmap_range_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t); + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 +#define XENMEM_get_sharing_shared_pages 19 + +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_aligned_t buffer; + /* Other OPs */ + uint64_aligned_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 +#define XENMEM_sharing_op_audit 8 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +/* The following allows sharing of grant refs. This is useful + * for sharing utilities sitting as "filters" in IO backends + * (e.g. memshr + blktap(2)). The IO backend is only exposed + * to grant references, and this allows sharing of the grefs */ +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_aligned_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ + uint64_aligned_t source_handle; /* IN: handle to the source page */ + uint64_aligned_t client_gfn; /* IN: the client gfn */ + uint64_aligned_t client_handle; /* IN: handle to the client page */ + domid_t client_domain; /* IN: the client domain id */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to debug */ + uint64_aligned_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + +/* + * Attempt to stake a claim for a domain on a quantity of pages + * of system RAM, but _not_ assign specific pageframes. Only + * arithmetic is performed so the hypercall is very fast and need + * not be preemptible, thus sidestepping time-of-check-time-of-use + * races for memory allocation. Returns 0 if the hypervisor page + * allocator has atomically and successfully claimed the requested + * number of pages, else non-zero. + * + * Any domain may have only one active claim. When sufficient memory + * has been allocated to resolve the claim, the claim silently expires. + * Claiming zero pages effectively resets any outstanding claim and + * is always successful. + * + * Note that a valid claim may be staked even after memory has been + * allocated for a domain. In this case, the claim is not incremental, + * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, + * only 7 additional pages are claimed. + * + * Caller must be privileged or the hypercall fails. + */ +#define XENMEM_claim_pages 24 + +/* + * XENMEM_claim_pages flags - the are no flags at this time. + * The zero value is appropiate. + */ + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/nmi.h b/sys/src/9/xen/xen-public/nmi.h new file mode 100644 index 000000000..604b2938e --- /dev/null +++ b/sys/src/9/xen/xen-public/nmi.h @@ -0,0 +1,85 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +#include "xen.h" + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) +#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */ + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) +#endif + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +typedef struct xennmi_callback xennmi_callback_t; +DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/physdev.h b/sys/src/9/xen/xen-public/physdev.h new file mode 100644 index 000000000..db7e37af6 --- /dev/null +++ b/sys/src/9/xen/xen-public/physdev.h @@ -0,0 +1,376 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN */ + int index; + /* IN or OUT */ + int pirq; + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + +#define PHYSDEVOP_DBGP_RESET_PREPARE 1 +#define PHYSDEVOP_DBGP_RESET_DONE 2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 +#define PHYSDEVOP_DBGP_BUS_PCI 1 + +#define PHYSDEVOP_dbgp_op 29 +struct physdev_dbgp_op { + /* IN */ + uint8_t op; + uint8_t bus; + union { + struct physdev_pci_device pci; + } u; +}; +typedef struct physdev_dbgp_op physdev_dbgp_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared + +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/platform.h b/sys/src/9/xen/xen-public/platform.h new file mode 100644 index 000000000..4341f5462 --- /dev/null +++ b/sys/src/9/xen/xen-public/platform.h @@ -0,0 +1,572 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include "xen.h" + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC, + * 1 January, 1970 if the current system time was <system_time>. + */ +#define XENPF_settime 17 +struct xenpf_settime { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +typedef struct xenpf_settime xenpf_settime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_add_memtype xenpf_add_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_del_memtype xenpf_del_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +typedef struct xenpf_read_memtype xenpf_read_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +typedef struct xenpf_microcode_update xenpf_microcode_update_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); + +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + unsigned long status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + unsigned long size; + XEN_GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + unsigned long size; + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + +#define XEN_EFI_VARINFO_BOOT_SNAPSHOT 0x00000001 + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t max_capsule_size; + unsigned int reset_type; + } query_capsule_capabilities; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 +#define XEN_FW_EFI_PCI_ROM 5 +#define XEN_FW_KBD_SHIFT_FLAGS 5 +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + struct { + /* IN variables */ + uint16_t segment; + uint8_t bus; + uint8_t devfn; + uint16_t vendor; + uint16_t devid; + /* OUT variables */ + uint64_t address; + xen_ulong_t size; + } pci_rom; + } efi_info; /* XEN_FW_EFI_INFO */ + + /* Int16, Fn02: Get keyboard shift flags. */ + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ +#if __XEN_INTERFACE_VERSION__ < 0x00040300 + uint16_t pm1a_cnt_val; /* PM1a control value. */ + uint16_t pm1b_cnt_val; /* PM1b control value. */ +#else + uint16_t val_a; /* PM1a control / sleep type A. */ + uint16_t val_b; /* PM1b control / sleep type B. */ +#endif + uint32_t sleep_state; /* Which state to enter (Sn). */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 + uint32_t flags; /* XENPF_ACPI_SLEEP_*. */ +}; +typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq 52 +struct xenpf_change_freq { + /* IN variables */ + uint32_t flags; /* Must be zero. */ + uint32_t cpu; /* Physical cpu. */ + uint64_t freq; /* New frequency (Hz). */ +}; +typedef struct xenpf_change_freq xenpf_change_freq_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime 53 +struct xenpf_getidletime { + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + XEN_GUEST_HANDLE(uint8) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ + XEN_GUEST_HANDLE(uint64) idletime; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; +}; +typedef struct xenpf_getidletime xenpf_getidletime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo 54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX 1 +#define XEN_PROCESSOR_PM_PX 2 +#define XEN_PROCESSOR_PM_TX 4 + +/* cmd type */ +#define XEN_PM_CX 0 +#define XEN_PM_PX 1 +#define XEN_PM_TX 2 +#define XEN_PM_PDC 3 + +/* Px sub info type */ +#define XEN_PX_PCT 1 +#define XEN_PX_PSS 2 +#define XEN_PX_PPC 4 +#define XEN_PX_PSD 8 + +struct xen_power_register { + uint32_t space_id; + uint32_t bit_width; + uint32_t bit_offset; + uint32_t access_size; + uint64_t address; +}; + +struct xen_processor_csd { + uint32_t domain; /* domain number of one dependent group */ + uint32_t coord_type; /* coordination type */ + uint32_t num; /* number of processors in same domain */ +}; +typedef struct xen_processor_csd xen_processor_csd_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); + +struct xen_processor_cx { + struct xen_power_register reg; /* GAS for Cx trigger register */ + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ + uint32_t power; /* average power consumption(mW) */ + uint32_t dpcnt; /* number of dependency entries */ + XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ +}; +typedef struct xen_processor_cx xen_processor_cx_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); + +struct xen_processor_flags { + uint32_t bm_control:1; + uint32_t bm_check:1; + uint32_t has_cst:1; + uint32_t power_setup_done:1; + uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { + uint32_t count; /* number of C state entries in array below */ + struct xen_processor_flags flags; /* global flags of this processor */ + XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ +}; + +struct xen_pct_register { + uint8_t descriptor; + uint16_t length; + uint8_t space_id; + uint8_t bit_width; + uint8_t bit_offset; + uint8_t reserved; + uint64_t address; +}; + +struct xen_processor_px { + uint64_t core_frequency; /* megahertz */ + uint64_t power; /* milliWatts */ + uint64_t transition_latency; /* microseconds */ + uint64_t bus_master_latency; /* microseconds */ + uint64_t control; /* control value */ + uint64_t status; /* success indicator */ +}; +typedef struct xen_processor_px xen_processor_px_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); + +struct xen_psd_package { + uint64_t num_entries; + uint64_t revision; + uint64_t domain; + uint64_t coord_type; + uint64_t num_processors; +}; + +struct xen_processor_performance { + uint32_t flags; /* flag for Px sub info type */ + uint32_t platform_limit; /* Platform limitation on freq usage */ + struct xen_pct_register control_register; + struct xen_pct_register status_register; + uint32_t state_count; /* total available performance states */ + XEN_GUEST_HANDLE(xen_processor_px_t) states; + struct xen_psd_package domain_info; + uint32_t shared_type; /* coordination type of this processor */ +}; +typedef struct xen_processor_performance xen_processor_performance_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); + +struct xenpf_set_processor_pminfo { + /* IN variables */ + uint32_t id; /* ACPI CPU ID */ + uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ + } u; +}; +typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); + +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + +#define XENPF_get_cpu_version 48 +struct xenpf_pcpu_version { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; + char vendor_id[12]; + uint32_t family; + uint32_t model; + uint32_t stepping; +}; +typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol +{ + uint32_t cpuid; +}; +typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd +{ + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +typedef struct xenpf_core_parking xenpf_core_parking_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_platform_op(const struct xen_platform_op*); + */ +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime settime; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; + struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; + struct xenpf_change_freq change_freq; + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; + uint8_t pad[128]; + } u; +}; +typedef struct xen_platform_op xen_platform_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/sched.h b/sys/src/9/xen/xen-public/sched.h new file mode 100644 index 000000000..a30b11d96 --- /dev/null +++ b/sys/src/9/xen/xen-public/sched.h @@ -0,0 +1,174 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * `incontents 150 sched Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* ` enum sched_op { // SCHEDOP_* => struct sched_* */ +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown_t structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then this + * hypercall takes an additional extra argument which should be the + * MFN of the guest's start_info_t. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll_t structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown_t structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown_t, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog_t structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +/* ` } */ + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +/* ` enum sched_shutdown_reason { */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ +#define SHUTDOWN_MAX 4 /* Maximum valid shutdown reason. */ +/* ` } */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/sysctl.h b/sys/src/9/xen/xen-public/sysctl.h new file mode 100644 index 000000000..8437d31e1 --- /dev/null +++ b/sys/src/9/xen/xen-public/sysctl.h @@ -0,0 +1,694 @@ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A + +/* + * Read console content from Xen buffer ring. + */ +/* XEN_SYSCTL_readconsole */ +struct xen_sysctl_readconsole { + /* IN: Non-zero -> clear after reading. */ + uint8_t clear; + /* IN: Non-zero -> start index specified by @index field. */ + uint8_t incremental; + uint8_t pad0, pad1; + /* + * IN: Start index for consuming from ring buffer (if @incremental); + * OUT: End index after consuming from ring buffer. + */ + uint32_t index; + /* IN: Virtual address to write console data. */ + XEN_GUEST_HANDLE_64(char) buffer; + /* IN: Size of buffer; OUT: Bytes written to buffer. */ + uint32_t count; +}; +typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); + +/* Get trace buffers machine base address */ +/* XEN_SYSCTL_tbuf_op */ +struct xen_sysctl_tbuf_op { + /* IN variables */ +#define XEN_SYSCTL_TBUFOP_get_info 0 +#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 +#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 +#define XEN_SYSCTL_TBUFOP_set_size 3 +#define XEN_SYSCTL_TBUFOP_enable 4 +#define XEN_SYSCTL_TBUFOP_disable 5 + uint32_t cmd; + /* IN/OUT variables */ + struct xenctl_bitmap cpu_mask; + uint32_t evt_mask; + /* OUT variables */ + uint64_aligned_t buffer_mfn; + uint32_t size; /* Also an IN variable! */ +}; +typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); + +/* + * Get physical information about the host machine + */ +/* XEN_SYSCTL_physinfo */ + /* (x86) The platform supports HVM guests. */ +#define _XEN_SYSCTL_PHYSCAP_hvm 0 +#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) + /* (x86) The platform supports HVM-guest direct access to I/O devices. */ +#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 +#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ + uint32_t cpu_khz; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint64_aligned_t outstanding_pages; + uint32_t hw_cap[8]; + + /* XEN_SYSCTL_PHYSCAP_??? */ + uint32_t capabilities; +}; +typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); + +/* + * Get the ID of the current scheduler. + */ +/* XEN_SYSCTL_sched_id */ +struct xen_sysctl_sched_id { + /* OUT variable */ + uint32_t sched_id; +}; +typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); + +/* Interface for controlling Xen software performance counters. */ +/* XEN_SYSCTL_perfc_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ +#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ +struct xen_sysctl_perfc_desc { + char name[80]; /* name of perf counter */ + uint32_t nr_vals; /* number of values for this counter */ +}; +typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); +typedef uint32_t xen_sysctl_perfc_val_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); + +struct xen_sysctl_perfc_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ + /* OUT variables. */ + uint32_t nr_counters; /* number of counters description */ + uint32_t nr_vals; /* number of values */ + /* counter information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + /* counter values (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; +}; +typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); + +/* XEN_SYSCTL_getdomaininfolist */ +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; +typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); + +/* Inject debug keys into Xen. */ +/* XEN_SYSCTL_debug_keys */ +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +/* XEN_SYSCTL_getcpuinfo */ +struct xen_sysctl_cpuinfo { + uint64_aligned_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + +/* XEN_SYSCTL_availheap */ +struct xen_sysctl_availheap { + /* IN variables. */ + uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ + uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ + int32_t node; /* NUMA node of interest (-1 for all nodes). */ + /* OUT variables. */ + uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ +}; +typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); + +/* XEN_SYSCTL_get_pmstat */ +struct pm_px_val { + uint64_aligned_t freq; /* Px core frequency */ + uint64_aligned_t residency; /* Px residency time */ + uint64_aligned_t count; /* Px transition count */ +}; +typedef struct pm_px_val pm_px_val_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); + +struct pm_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ + XEN_GUEST_HANDLE_64(pm_px_val_t) pt; +}; +typedef struct pm_px_stat pm_px_stat_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); + +struct pm_cx_stat { + uint32_t nr; /* entry nr in triggers & residencies, including C0 */ + uint32_t last; /* last Cx state */ + uint64_aligned_t idle_time; /* idle time from boot */ + XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ + XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ + uint64_aligned_t pc2; + uint64_aligned_t pc3; + uint64_aligned_t pc6; + uint64_aligned_t pc7; + uint64_aligned_t cc3; + uint64_aligned_t cc6; + uint64_aligned_t cc7; +}; + +struct xen_sysctl_get_pmstat { +#define PMSTAT_CATEGORY_MASK 0xf0 +#define PMSTAT_PX 0x10 +#define PMSTAT_CX 0x20 +#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) +#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) +#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) +#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) +#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) +#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) + uint32_t type; + uint32_t cpuid; + union { + struct pm_px_stat getpx; + struct pm_cx_stat getcx; + /* other struct for tx, etc */ + } u; +}; +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); + +/* XEN_SYSCTL_cpu_hotplug */ +struct xen_sysctl_cpu_hotplug { + /* IN variables */ + uint32_t cpu; /* Physical cpu. */ +#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 +#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 + uint32_t op; /* hotplug opcode */ +}; +typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); + +/* + * Get/set xen power management, include + * 1. cpufreq governors and related parameters + */ +/* XEN_SYSCTL_pm_op */ +struct xen_userspace { + uint32_t scaling_setspeed; +}; +typedef struct xen_userspace xen_userspace_t; + +struct xen_ondemand { + uint32_t sampling_rate_max; + uint32_t sampling_rate_min; + + uint32_t sampling_rate; + uint32_t up_threshold; +}; +typedef struct xen_ondemand xen_ondemand_t; + +/* + * cpufreq para name of this structure named + * same as sysfs file name of native linux + */ +#define CPUFREQ_NAME_LEN 16 +struct xen_get_cpufreq_para { + /* IN/OUT variable */ + uint32_t cpu_num; + uint32_t freq_num; + uint32_t gov_num; + + /* for all governors */ + /* OUT variable */ + XEN_GUEST_HANDLE_64(uint32) affected_cpus; + XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; + XEN_GUEST_HANDLE_64(char) scaling_available_governors; + char scaling_driver[CPUFREQ_NAME_LEN]; + + uint32_t cpuinfo_cur_freq; + uint32_t cpuinfo_max_freq; + uint32_t cpuinfo_min_freq; + uint32_t scaling_cur_freq; + + char scaling_governor[CPUFREQ_NAME_LEN]; + uint32_t scaling_max_freq; + uint32_t scaling_min_freq; + + /* for specific governor */ + union { + struct xen_userspace userspace; + struct xen_ondemand ondemand; + } u; + + int32_t turbo_enabled; +}; + +struct xen_set_cpufreq_gov { + char scaling_governor[CPUFREQ_NAME_LEN]; +}; + +struct xen_set_cpufreq_para { + #define SCALING_MAX_FREQ 1 + #define SCALING_MIN_FREQ 2 + #define SCALING_SETSPEED 3 + #define SAMPLING_RATE 4 + #define UP_THRESHOLD 5 + + uint32_t ctrl_type; + uint32_t ctrl_value; +}; + +struct xen_sysctl_pm_op { + #define PM_PARA_CATEGORY_MASK 0xf0 + #define CPUFREQ_PARA 0x10 + + /* cpufreq command type */ + #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) + #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) + #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) + #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) + + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + + /* enable/disable turbo mode when in dbs governor */ + #define XEN_SYSCTL_pm_op_enable_turbo 0x26 + #define XEN_SYSCTL_pm_op_disable_turbo 0x27 + + uint32_t cmd; + uint32_t cpuid; + union { + struct xen_get_cpufreq_para get_para; + struct xen_set_cpufreq_gov set_gov; + struct xen_set_cpufreq_para set_para; + uint64_aligned_t get_avgfreq; + uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; + } u; +}; + +/* XEN_SYSCTL_page_offline_op */ +struct xen_sysctl_page_offline_op { + /* IN: range of page to be offlined */ +#define sysctl_page_offline 1 +#define sysctl_page_online 2 +#define sysctl_query_page_offline 3 + uint32_t cmd; + uint32_t start; + uint32_t end; + /* OUT: result of page offline request */ + /* + * bit 0~15: result flags + * bit 16~31: owner + */ + XEN_GUEST_HANDLE(uint32) status; +}; + +#define PG_OFFLINE_STATUS_MASK (0xFFUL) + +/* The result is invalid, i.e. HV does not handle it */ +#define PG_OFFLINE_INVALID (0x1UL << 0) + +#define PG_OFFLINE_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_PENDING (0x1UL << 2) +#define PG_OFFLINE_FAILED (0x1UL << 3) +#define PG_OFFLINE_AGAIN (0x1UL << 4) + +#define PG_ONLINE_FAILED PG_OFFLINE_FAILED +#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED + +#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) +#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) +#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) + +#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) + +/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ +#define PG_OFFLINE_XENPAGE (0x1UL << 8) +#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) +#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) +#define PG_OFFLINE_OWNED (0x1UL << 12) + +#define PG_OFFLINE_BROKEN (0x1UL << 13) +#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN + +#define PG_OFFLINE_OWNER_SHIFT 16 + +/* XEN_SYSCTL_lockprof_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ +#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ +/* Record-type: */ +#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ +#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ +#define LOCKPROF_TYPE_N 2 /* number of types */ +struct xen_sysctl_lockprof_data { + char name[40]; /* lock name (may include up to 2 %d specifiers) */ + int32_t type; /* LOCKPROF_TYPE_??? */ + int32_t idx; /* index (e.g. domain id) */ + uint64_aligned_t lock_cnt; /* # of locking succeeded */ + uint64_aligned_t block_cnt; /* # of wait for lock */ + uint64_aligned_t lock_time; /* nsecs lock held */ + uint64_aligned_t block_time; /* nsecs waited for lock */ +}; +typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); +struct xen_sysctl_lockprof_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ + uint32_t max_elem; /* size of output buffer */ + /* OUT variables (query only). */ + uint32_t nr_elem; /* number of elements available */ + uint64_aligned_t time; /* nsecs of profile measurement */ + /* profile information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; +}; +typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); + +/* XEN_SYSCTL_topologyinfo */ +#define INVALID_TOPOLOGY_ID (~0U) +struct xen_sysctl_topologyinfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + * If OUT is leass than IN then the array tails are not written by sysctl. + */ + uint32_t max_cpu_index; + + /* + * If not NULL, these arrays are filled with core/socket/node identifier + * for each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written to each array. + * The number of array elements written by the sysctl is: + * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +/* XEN_SYSCTL_numainfo */ +#define INVALID_NUMAINFO_ID (~0U) +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest node identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + */ + uint32_t max_node_index; + + /* NB. Entries are 0 if node is not present. */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + /* + * Array, of size (max_node_index+1)^2, listing memory access distances + * between nodes. If an entry has no node distance information (e.g., node + * not present) then the value ~0u is written. + * + * Note that the array rows must be indexed by multiplying by the minimum + * of the caller-provided max_node_index and the returned value of + * max_node_index. That is, if the largest node index in the system is + * smaller than the caller can handle, a smaller 2-d array is constructed + * within the space provided by the caller. When this occurs, trailing + * space provided by the caller is not modified. If the largest node index + * in the system is larger than the caller can handle, then a 2-d array of + * the maximum size handleable by the caller is constructed. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + +/* XEN_SYSCTL_cpupool_op */ +#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_sysctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_bitmap cpumap; /* OUT: IF */ +}; +typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); + +#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 +/* + * This structure is used to pass a new ARINC653 schedule from a + * privileged domain (ie dom0) to Xen. + */ +struct xen_sysctl_arinc653_schedule { + /* major_frame holds the time for the new schedule's major frame + * in nanoseconds. */ + uint64_aligned_t major_frame; + /* num_sched_entries holds how many of the entries in the + * sched_entries[] array are valid. */ + uint8_t num_sched_entries; + /* The sched_entries array holds the actual schedule entries. */ + struct { + /* dom_handle must match a domain's UUID */ + xen_domain_handle_t dom_handle; + /* If a domain has multiple VCPUs, vcpu_id specifies which one + * this schedule entry applies to. It should be set to 0 if + * there is only one VCPU for the domain. */ + unsigned int vcpu_id; + /* runtime specifies the amount of time that should be allocated + * to this VCPU per major frame. It is specified in nanoseconds */ + uint64_aligned_t runtime; + } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; +}; +typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); + +struct xen_sysctl_credit_schedule { + /* Length of timeslice in milliseconds */ +#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 +#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 + unsigned tslice_ms; + /* Rate limit (minimum timeslice) in microseconds */ +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 + unsigned ratelimit_us; +}; +typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); + +/* XEN_SYSCTL_scheduler_op */ +/* Set or get info? */ +#define XEN_SYSCTL_SCHEDOP_putinfo 0 +#define XEN_SYSCTL_SCHEDOP_getinfo 1 +struct xen_sysctl_scheduler_op { + uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ + uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ + uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ + union { + struct xen_sysctl_sched_arinc653 { + XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; + } sched_arinc653; + struct xen_sysctl_credit_schedule sched_credit; + } u; +}; +typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); + +/* XEN_SYSCTL_coverage_op */ +/* + * Get total size of information, to help allocate + * the buffer. The pointer points to a 32 bit value. + */ +#define XEN_SYSCTL_COVERAGE_get_total_size 0 + +/* + * Read coverage information in a single run + * You must use a tool to split them. + */ +#define XEN_SYSCTL_COVERAGE_read 1 + +/* + * Reset all the coverage counters to 0 + * No parameters. + */ +#define XEN_SYSCTL_COVERAGE_reset 2 + +/* + * Like XEN_SYSCTL_COVERAGE_read but reset also + * counters to 0 in a single call. + */ +#define XEN_SYSCTL_COVERAGE_read_and_reset 3 + +struct xen_sysctl_coverage_op { + uint32_t cmd; /* XEN_SYSCTL_COVERAGE_* */ + union { + uint32_t total_size; /* OUT */ + XEN_GUEST_HANDLE_64(uint8) raw_info; /* OUT */ + } u; +}; +typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t); + + +struct xen_sysctl { + uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 +#define XEN_SYSCTL_coverage_op 20 + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_readconsole readconsole; + struct xen_sysctl_tbuf_op tbuf_op; + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; + struct xen_sysctl_sched_id sched_id; + struct xen_sysctl_perfc_op perfc_op; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; + struct xen_sysctl_availheap availheap; + struct xen_sysctl_get_pmstat get_pmstat; + struct xen_sysctl_cpu_hotplug cpu_hotplug; + struct xen_sysctl_pm_op pm_op; + struct xen_sysctl_page_offline_op page_offline; + struct xen_sysctl_lockprof_op lockprof_op; + struct xen_sysctl_cpupool_op cpupool_op; + struct xen_sysctl_scheduler_op scheduler_op; + struct xen_sysctl_coverage_op coverage_op; + uint8_t pad[128]; + } u; +}; +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/tmem.h b/sys/src/9/xen/xen-public/tmem.h new file mode 100644 index 000000000..bf537988a --- /dev/null +++ b/sys/src/9/xen/xen-public/tmem.h @@ -0,0 +1,148 @@ +/****************************************************************************** + * tmem.h + * + * Guest OS interface to Xen Transcendent Memory. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_TMEM_H__ +#define __XEN_PUBLIC_TMEM_H__ + +#include "xen.h" + +/* version of ABI */ +#define TMEM_SPEC_VERSION 1 + +/* Commands to HYPERVISOR_tmem_op() */ +#define TMEM_CONTROL 0 +#define TMEM_NEW_POOL 1 +#define TMEM_DESTROY_POOL 2 +#define TMEM_NEW_PAGE 3 +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#define TMEM_READ 8 +#define TMEM_WRITE 9 +#define TMEM_XCHG 10 + +/* Privileged commands to HYPERVISOR_tmem_op() */ +#define TMEM_AUTH 101 +#define TMEM_RESTORE_NEW 102 + +/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ +#define TMEMC_THAW 0 +#define TMEMC_FREEZE 1 +#define TMEMC_FLUSH 2 +#define TMEMC_DESTROY 3 +#define TMEMC_LIST 4 +#define TMEMC_SET_WEIGHT 5 +#define TMEMC_SET_CAP 6 +#define TMEMC_SET_COMPRESS 7 +#define TMEMC_QUERY_FREEABLE_MB 8 +#define TMEMC_SAVE_BEGIN 10 +#define TMEMC_SAVE_GET_VERSION 11 +#define TMEMC_SAVE_GET_MAXPOOLS 12 +#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 +#define TMEMC_SAVE_GET_CLIENT_CAP 14 +#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 +#define TMEMC_SAVE_GET_POOL_FLAGS 16 +#define TMEMC_SAVE_GET_POOL_NPAGES 17 +#define TMEMC_SAVE_GET_POOL_UUID 18 +#define TMEMC_SAVE_GET_NEXT_PAGE 19 +#define TMEMC_SAVE_GET_NEXT_INV 20 +#define TMEMC_SAVE_END 21 +#define TMEMC_RESTORE_BEGIN 30 +#define TMEMC_RESTORE_PUT_PAGE 32 +#define TMEMC_RESTORE_FLUSH_PAGE 33 + +/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PRECOMPRESSED 4 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_POOL_PAGESIZE_MASK 0xf +#define TMEM_POOL_VERSION_SHIFT 24 +#define TMEM_POOL_VERSION_MASK 0xff +#define TMEM_POOL_RESERVED_BITS 0x00ffff00 + +/* Bits for client flags (save/restore) */ +#define TMEM_CLIENT_COMPRESS 1 +#define TMEM_CLIENT_FROZEN 2 + +/* Special errno values */ +#define EFROZEN 1000 +#define EEMPTY 1001 + + +#ifndef __ASSEMBLY__ +typedef xen_pfn_t tmem_cli_mfn_t; +typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; +struct tmem_op { + uint32_t cmd; + int32_t pool_id; + union { + struct { + uint64_t uuid[2]; + uint32_t flags; + uint32_t arg1; + } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ + struct { + uint32_t subop; + uint32_t cli_id; + uint32_t arg1; + uint32_t arg2; + uint64_t oid[3]; + tmem_cli_va_t buf; + } ctrl; /* for cmd == TMEM_CONTROL */ + struct { + + uint64_t oid[3]; + uint32_t index; + uint32_t tmem_offset; + uint32_t pfn_offset; + uint32_t len; + tmem_cli_mfn_t cmfn; /* client machine page frame */ + } gen; /* for all other cmd ("generic") */ + } u; +}; +typedef struct tmem_op tmem_op_t; +DEFINE_XEN_GUEST_HANDLE(tmem_op_t); + +struct tmem_handle { + uint32_t pool_id; + uint32_t index; + uint64_t oid[3]; +}; +#endif + +#endif /* __XEN_PUBLIC_TMEM_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/trace.h b/sys/src/9/xen/xen-public/trace.h new file mode 100644 index 000000000..e2f60a632 --- /dev/null +++ b/sys/src/9/xen/xen-public/trace.h @@ -0,0 +1,310 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +#define TRACE_EXTRA_MAX 7 +#define TRACE_EXTRA_SHIFT 28 + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_PV 0x0020f000 /* Xen PV traces */ +#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ +#define TRC_ALL 0x0ffff000 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) +#define TRC_HD_CYCLE_FLAG (1UL<<31) +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ + +#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ +#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ + +/* + * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are + * reserved for encoding what scheduler produced the information. The + * actual event is encoded in the last 9 bits. + * + * This means we have 8 scheduling IDs available (which means at most 8 + * schedulers generating events) and, in each scheduler, up to 512 + * different events. + */ +#define TRC_SCHED_ID_BITS 3 +#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS) +#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT) +#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK)) + +/* Per-scheduler IDs, to identify scheduler specific events */ +#define TRC_SCHED_CSCHED 0 +#define TRC_SCHED_CSCHED2 1 +#define TRC_SCHED_SEDF 2 +#define TRC_SCHED_ARINC653 3 + +/* Per-scheduler tracing */ +#define TRC_SCHED_CLASS_EVT(_c, _e) \ + ( ( TRC_SCHED_CLASS | \ + ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \ + (_e & TRC_SCHED_EVT_MASK) ) + +/* Trace classes for Hardware */ +#define TRC_HW_PM 0x00801000 /* Power management traces */ +#define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ + +/* Trace events per class */ +#define TRC_LOST_RECORDS (TRC_GEN + 1) +#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) +#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) + +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) +#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) +#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) +#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) +#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4) +#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5) +#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6) +#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7) +#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8) +#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9) +#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10) +#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11) +#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12) +#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) +#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) + +#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) +#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) +#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) +#define TRC_MEM_SET_P2M_ENTRY (TRC_MEM + 4) +#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5) +#define TRC_MEM_POD_POPULATE (TRC_MEM + 16) +#define TRC_MEM_POD_ZERO_RECLAIM (TRC_MEM + 17) +#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18) + +#define TRC_PV_ENTRY 0x00201000 /* Hypervisor entry points for PV guests. */ +#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */ + +#define TRC_PV_HYPERCALL (TRC_PV_ENTRY + 1) +#define TRC_PV_TRAP (TRC_PV_ENTRY + 3) +#define TRC_PV_PAGE_FAULT (TRC_PV_ENTRY + 4) +#define TRC_PV_FORCED_INVALID_OP (TRC_PV_ENTRY + 5) +#define TRC_PV_EMULATE_PRIVOP (TRC_PV_ENTRY + 6) +#define TRC_PV_EMULATE_4GB (TRC_PV_ENTRY + 7) +#define TRC_PV_MATH_STATE_RESTORE (TRC_PV_ENTRY + 8) +#define TRC_PV_PAGING_FIXUP (TRC_PV_ENTRY + 9) +#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10) +#define TRC_PV_PTWR_EMULATION (TRC_PV_ENTRY + 11) +#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV_ENTRY + 12) +#define TRC_PV_HYPERCALL_V2 (TRC_PV_ENTRY + 13) +#define TRC_PV_HYPERCALL_SUBCALL (TRC_PV_SUBCALL + 14) + +/* + * TRC_PV_HYPERCALL_V2 format + * + * Only some of the hypercall argument are recorded. Bit fields A0 to + * A5 in the first extra word are set if the argument is present and + * the arguments themselves are packed sequentially in the following + * words. + * + * The TRC_64_FLAG bit is not set for these events (even if there are + * 64-bit arguments in the record). + * + * Word + * 0 bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0 + * A5 |A4 |A3 |A2 |A1 |A0 |Hypercall op + * 1 First 32 bit (or low word of first 64 bit) arg in record + * 2 Second 32 bit (or high word of first 64 bit) arg in record + * ... + * + * A0-A5 bitfield values: + * + * 00b Argument not present + * 01b 32-bit argument present + * 10b 64-bit argument present + * 11b Reserved + */ +#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i))) +#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i))) +#define TRC_PV_HYPERCALL_V2_ARG_MASK (0xfff00000) + +#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1) +#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2) +#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3) +#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4) +#define TRC_SHADOW_MMIO (TRC_SHADOW + 5) +#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6) +#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7) +#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8) +#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9) +#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10) +#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11) +#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12) +#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13) +#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14) +#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) + +/* trace events per subclass */ +#define TRC_HVM_NESTEDFLAG (0x400) +#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) +#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) +#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) +#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) +#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01) +#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) +#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02) +#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) +#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) +#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) +#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) +#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) +#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) +#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08) +#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) +#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09) +#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) +#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) +#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) +#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) +#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) +#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) +#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) +#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) +#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) +#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) +#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) +#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) +#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) +#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) +#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) +#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) +#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) +#define TRC_HVM_RDTSC (TRC_HVM_HANDLER + 0x1a) +#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) +#define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21) +#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22) +#define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) +#define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) +#define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) + +#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) +#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) + +/* trace events for per class */ +#define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) +#define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) +#define TRC_PM_IDLE_EXIT (TRC_HW_PM + 0x03) + +/* Trace events for IRQs */ +#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1) +#define TRC_HW_IRQ_MOVE_CLEANUP (TRC_HW_IRQ + 0x2) +#define TRC_HW_IRQ_BIND_VECTOR (TRC_HW_IRQ + 0x3) +#define TRC_HW_IRQ_CLEAR_VECTOR (TRC_HW_IRQ + 0x4) +#define TRC_HW_IRQ_MOVE_FINISH (TRC_HW_IRQ + 0x5) +#define TRC_HW_IRQ_ASSIGN_VECTOR (TRC_HW_IRQ + 0x6) +#define TRC_HW_IRQ_UNMAPPED_VECTOR (TRC_HW_IRQ + 0x7) +#define TRC_HW_IRQ_HANDLED (TRC_HW_IRQ + 0x8) + +/* + * Event Flags + * + * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple + * record formats. These event flags distinguish between the + * different formats. + */ +#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */ + +/* This structure represents a single trace buffer record. */ +struct t_rec { + uint32_t event:28; + uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */ + uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */ + union { + struct { + uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ + uint32_t extra_u32[7]; /* event data items */ + } cycles; + struct { + uint32_t extra_u32[7]; /* event data items */ + } nocycles; + } u; +}; + +/* + * This structure contains the metadata for a single trace buffer. The head + * field, indexes into an array of struct t_rec's. + */ +struct t_buf { + /* Assume the data buffer size is X. X is generally not a power of 2. + * CONS and PROD are incremented modulo (2*X): + * 0 <= cons < 2*X + * 0 <= prod < 2*X + * This is done because addition modulo X breaks at 2^32 when X is not a + * power of 2: + * (((2^32 - 1) % X) + 1) % X != (2^32) % X + */ + uint32_t cons; /* Offset of next item to be consumed by control tools. */ + uint32_t prod; /* Offset of next item to be produced by Xen. */ + /* Records follow immediately after the meta-data header. */ +}; + +/* Structure used to pass MFNs to the trace buffers back to trace consumers. + * Offset is an offset into the mapped structure where the mfn list will be held. + * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/vcpu.h b/sys/src/9/xen/xen-public/vcpu.h new file mode 100644 index 000000000..e888daf67 --- /dev/null +++ b/sys/src/9/xen/xen-public/vcpu.h @@ -0,0 +1,240 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); + +#endif /* __XEN_PUBLIC_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/version.h b/sys/src/9/xen/xen-public/version.h new file mode 100644 index 000000000..44f26b0cb --- /dev/null +++ b/sys/src/9/xen/xen-public/version.h @@ -0,0 +1,96 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com> + * Copyright (c) 2005, Keir Fraser <keir@xensource.com> + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +#include "xen.h" + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; +typedef struct xen_compile_info xen_compile_info_t; + +#define XENVER_capabilities 3 +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + xen_ulong_t virt_start; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; +typedef struct xen_feature_info xen_feature_info_t; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + +#endif /* __XEN_PUBLIC_VERSION_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/xen-compat.h b/sys/src/9/xen/xen-public/xen-compat.h new file mode 100644 index 000000000..69141c403 --- /dev/null +++ b/sys/src/9/xen/xen-public/xen-compat.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040300 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff --git a/sys/src/9/xen/xen-public/xen.h b/sys/src/9/xen/xen-public/xen.h new file mode 100644 index 000000000..fe179b9ca --- /dev/null +++ b/sys/src/9/xen/xen-public/xen.h @@ -0,0 +1,890 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include "xen-compat.h" + +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__arm__) || defined (__aarch64__) +#include "arch-arm.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +#if __XEN_INTERFACE_VERSION__ < 0x00040300 +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +#endif +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); +#endif + +/* + * HYPERCALLS + */ + +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* ` } */ + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +/* ` enum virq { */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 +/* ` } */ + +#define NR_VIRQS 24 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 + * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux + * +-----+-----+----+----+----+-----+----+----+ + * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +-----+-----+----+----+----+-----+----+----+ + * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen + * +-----+-----+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ + +/* + * MMU EXTENDED OPERATIONS + * + * ` enum neg_errnoval + * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[], + * ` unsigned int count, + * ` unsigned int *pdone, + * ` unsigned int foreigndom) + */ +/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. + */ +/* ` enum mmuext_cmd { */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 +/* ` } */ + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; /* => enum mmuext_cmd */ + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val, + * ` enum uvm_flags flags) + * ` + * ` enum neg_errnoval + * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val, + * ` enum uvm_flags flags, + * ` domid_t domid) + * ` + * ` @va: The virtual address whose mapping we want to change + * ` @val: The new page table entry, must contain a machine address + * ` @flags: Control TLB flushes + */ +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +/* ` enum uvm_flags { */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +/* ` } */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_multicall(multicall_entry_t call_list[], + * ` unsigned int nr_calls); + * + * NB. The fields are natural register size for this architecture. + */ +struct multicall_entry { + unsigned long op, result; + unsigned long args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +/* + * Event channel endpoints per domain: + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * `incontents 200 startofday_shared Start-of-day shared data structure + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * `incontents 200 startofday Start-of-day memory layout + * + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base and CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + * + * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page + * table layout") a bug caused the pt_base (3.e above) and cr3 to not point + * to the start of the guest page tables (it was offset by two pages). + * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU + * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got + * allocated in the order: 'first L1','first L2', 'first L3', so the offset + * to the page table base is by two pages back. The initial domain if it is + * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the + * pages preceding pt_base and mark them as reserved/unused. + */ + +#define MAX_GUEST_CMDLINE 1024 +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen-<version>-<platform>". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; +/* + * `incontents 200 startofday_dom0_console Dom0_console + * + * The console structure in start_info.console.dom0 + * + * This structure includes a variety of information required to + * have a working VGA/VESA console. + */ +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif + +#ifndef __ASSEMBLY__ +struct xenctl_bitmap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_bits; +}; +#endif + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/xencomm.h b/sys/src/9/xen/xen-public/xencomm.h new file mode 100644 index 000000000..ac45e0712 --- /dev/null +++ b/sys/src/9/xen/xen-public/xencomm.h @@ -0,0 +1,41 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 + */ + +#ifndef _XEN_XENCOMM_H_ +#define _XEN_XENCOMM_H_ + +/* A xencomm descriptor is a scatter/gather list containing physical + * addresses corresponding to a virtually contiguous memory area. The + * hypervisor translates these physical addresses to machine addresses to copy + * to and from the virtually contiguous area. + */ + +#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ +#define XENCOMM_INVALID (~0UL) + +struct xencomm_desc { + uint32_t magic; + uint32_t nr_addrs; /* the number of entries in address[] */ + uint64_t address[0]; +}; + +#endif /* _XEN_XENCOMM_H_ */ diff --git a/sys/src/9/xen/xen-public/xenoprof.h b/sys/src/9/xen/xen-public/xenoprof.h new file mode 100644 index 000000000..1c305c4e6 --- /dev/null +++ b/sys/src/9/xen/xen-public/xenoprof.h @@ -0,0 +1,152 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +#include "xen.h" + +/* + * Commands to HYPERVISOR_xenoprof_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_reset_active_list 1 +#define XENOPROF_reset_passive_list 2 +#define XENOPROF_set_active 3 +#define XENOPROF_set_passive 4 +#define XENOPROF_reserve_counters 5 +#define XENOPROF_counter 6 +#define XENOPROF_setup_events 7 +#define XENOPROF_enable_virq 8 +#define XENOPROF_start 9 +#define XENOPROF_stop 10 +#define XENOPROF_disable_virq 11 +#define XENOPROF_release_counters 12 +#define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_set_backtrace 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* PC value that indicates a special code */ +#define XENOPROF_ESCAPE_CODE (~0ULL) +/* Transient events for the xenoprof->oprofile cpu buf */ +#define XENOPROF_TRACE_BEGIN 1 + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +}; +#ifndef __XEN__ +typedef struct xenoprof_buf xenoprof_buf_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif + +struct xenoprof_init { + int32_t num_events; + int32_t is_primary; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +}; +typedef struct xenoprof_init xenoprof_init_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); + +struct xenoprof_counter { + uint32_t ind; + uint64_t count; + uint32_t enabled; + uint32_t event; + uint32_t hypervisor; + uint32_t kernel; + uint32_t user; + uint64_t unit_mask; +}; +typedef struct xenoprof_counter xenoprof_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); + +typedef struct xenoprof_passive { + uint16_t domain_id; + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +} xenoprof_passive_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); + +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/sys/src/9/xen/xen-public/xsm/flask_op.h b/sys/src/9/xen/xen-public/xsm/flask_op.h new file mode 100644 index 000000000..233de8120 --- /dev/null +++ b/sys/src/9/xen/xen-public/xsm/flask_op.h @@ -0,0 +1,201 @@ +/* + * This file contains the flask_op hypercall commands and definitions. + * + * Author: George Coker, <gscoker@alpha.ncsc.mil> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __FLASK_OP_H__ +#define __FLASK_OP_H__ + +#define XEN_FLASK_INTERFACE_VERSION 1 + +struct xen_flask_load { + XEN_GUEST_HANDLE(char) buffer; + uint32_t size; +}; + +struct xen_flask_setenforce { + uint32_t enforcing; +}; + +struct xen_flask_sid_context { + /* IN/OUT: sid to convert to/from string */ + uint32_t sid; + /* IN: size of the context buffer + * OUT: actual size of the output context string + */ + uint32_t size; + XEN_GUEST_HANDLE(char) context; +}; + +struct xen_flask_access { + /* IN: access request */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + uint32_t req; + /* OUT: AVC data */ + uint32_t allowed; + uint32_t audit_allow; + uint32_t audit_deny; + uint32_t seqno; +}; + +struct xen_flask_transition { + /* IN: transition SIDs and class */ + uint32_t ssid; + uint32_t tsid; + uint32_t tclass; + /* OUT: new SID */ + uint32_t newsid; +}; + +struct xen_flask_userlist { + /* IN: starting SID for list */ + uint32_t start_sid; + /* IN: size of user string and output buffer + * OUT: number of SIDs returned */ + uint32_t size; + union { + /* IN: user to enumerate SIDs */ + XEN_GUEST_HANDLE(char) user; + /* OUT: SID list */ + XEN_GUEST_HANDLE(uint32) sids; + } u; +}; + +struct xen_flask_boolean { + /* IN/OUT: numeric identifier for boolean [GET/SET] + * If -1, name will be used and bool_id will be filled in. */ + uint32_t bool_id; + /* OUT: current enforcing value of boolean [GET/SET] */ + uint8_t enforcing; + /* OUT: pending value of boolean [GET/SET] */ + uint8_t pending; + /* IN: new value of boolean [SET] */ + uint8_t new_value; + /* IN: commit new value instead of only setting pending [SET] */ + uint8_t commit; + /* IN: size of boolean name buffer [GET/SET] + * OUT: actual size of name [GET only] */ + uint32_t size; + /* IN: if bool_id is -1, used to find boolean [GET/SET] + * OUT: textual name of boolean [GET only] + */ + XEN_GUEST_HANDLE(char) name; +}; + +struct xen_flask_setavc_threshold { + /* IN */ + uint32_t threshold; +}; + +struct xen_flask_hash_stats { + /* OUT */ + uint32_t entries; + uint32_t buckets_used; + uint32_t buckets_total; + uint32_t max_chain_len; +}; + +struct xen_flask_cache_stats { + /* IN */ + uint32_t cpu; + /* OUT */ + uint32_t lookups; + uint32_t hits; + uint32_t misses; + uint32_t allocations; + uint32_t reclaims; + uint32_t frees; +}; + +struct xen_flask_ocontext { + /* IN */ + uint32_t ocon; + uint32_t sid; + uint64_t low, high; +}; + +struct xen_flask_peersid { + /* IN */ + evtchn_port_t evtchn; + /* OUT */ + uint32_t sid; +}; + +struct xen_flask_relabel { + /* IN */ + uint32_t domid; + uint32_t sid; +}; + +struct xen_flask_op { + uint32_t cmd; +#define FLASK_LOAD 1 +#define FLASK_GETENFORCE 2 +#define FLASK_SETENFORCE 3 +#define FLASK_CONTEXT_TO_SID 4 +#define FLASK_SID_TO_CONTEXT 5 +#define FLASK_ACCESS 6 +#define FLASK_CREATE 7 +#define FLASK_RELABEL 8 +#define FLASK_USER 9 +#define FLASK_POLICYVERS 10 +#define FLASK_GETBOOL 11 +#define FLASK_SETBOOL 12 +#define FLASK_COMMITBOOLS 13 +#define FLASK_MLS 14 +#define FLASK_DISABLE 15 +#define FLASK_GETAVC_THRESHOLD 16 +#define FLASK_SETAVC_THRESHOLD 17 +#define FLASK_AVC_HASHSTATS 18 +#define FLASK_AVC_CACHESTATS 19 +#define FLASK_MEMBER 20 +#define FLASK_ADD_OCONTEXT 21 +#define FLASK_DEL_OCONTEXT 22 +#define FLASK_GET_PEER_SID 23 +#define FLASK_RELABEL_DOMAIN 24 + uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */ + union { + struct xen_flask_load load; + struct xen_flask_setenforce enforce; + /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */ + struct xen_flask_sid_context sid_context; + struct xen_flask_access access; + /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */ + struct xen_flask_transition transition; + struct xen_flask_userlist userlist; + /* FLASK_GETBOOL, FLASK_SETBOOL */ + struct xen_flask_boolean boolean; + struct xen_flask_setavc_threshold setavc_threshold; + struct xen_flask_hash_stats hash_stats; + struct xen_flask_cache_stats cache_stats; + /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */ + struct xen_flask_ocontext ocontext; + struct xen_flask_peersid peersid; + struct xen_flask_relabel relabel; + } u; +}; +typedef struct xen_flask_op xen_flask_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t); + +#endif diff --git a/sys/src/9/xen/xen.s b/sys/src/9/xen/xen.s new file mode 100644 index 000000000..e409f70d9 --- /dev/null +++ b/sys/src/9/xen/xen.s @@ -0,0 +1,72 @@ +#include "xendefs.h" +#include "mem.h" + +#define ENTRY(X) TEXT X(SB), $0 + +/* + * XXX there's a race in here because we can get an upcall + * betwen the spllo() (in xenupcall) and the rti. This will make + * handlers stack, which could lead to a blown stack. Probably + * not worth fixing (but possibly worth detecting and panicing). + * + * For fun get some popcorn and turn off the lights and read the + * linux solution (search for scrit/ecrit). + */ +ENTRY(hypervisor_callback) + SUBL $8, SP /* space for ecode and trap type */ + PUSHL DS /* save DS */ + PUSHL $(KDSEL) + POPL DS /* fix up DS */ + PUSHL ES /* save ES */ + PUSHL $(KDSEL) + POPL ES /* fix up ES */ + + PUSHL FS /* save the rest of the Ureg struct */ + PUSHL GS + PUSHAL + + PUSHL SP /* Ureg* argument to trap */ + CALL xenupcall+0(SB) + POPL AX + + POPAL + POPL GS + POPL FS + POPL ES + POPL DS + ADDL $8, SP /* pop error code and trap type */ + IRETL + +/* Hypervisor uses this for application faults while it executes.*/ +ENTRY(failsafe_callback) + IRETL + PUSHL AX + CALL install_safe_pf_handler(SB) + MOVL 32(SP), BX + MOVW BX, DS + MOVL 36(SP), BX + MOVW BX, ES + MOVL 40(SP), BX + MOVW BX, FS + MOVL 44(SP), BX + MOVW BX, GS + CALL install_normal_pf_handler(SB) + POPL AX + ADDL $16, SP + IRETL + +/* xen traps with varying argument counts */ +TEXT xencall6(SB), $0 + MOVL VDI+20(FP), DI +TEXT xencall5(SB), $0 + MOVL VSI+16(FP), SI +TEXT xencall4(SB), $0 + MOVL VDX+12(FP), DX +TEXT xencall3(SB), $0 + MOVL VCX+8(FP), CX +TEXT xencall2(SB), $0 + MOVL VBX+4(FP), BX +TEXT xencall1(SB), $0 + MOVL op+0(FP), AX + INT $0x82 + RET diff --git a/sys/src/9/xen/xenbin.c b/sys/src/9/xen/xenbin.c new file mode 100644 index 000000000..1f6844e0e --- /dev/null +++ b/sys/src/9/xen/xenbin.c @@ -0,0 +1,162 @@ +/* + * Transform a Plan 9 386 bootable image to make it compatible with + * the Xen binary image loader: + * + * - pad the beginning of the text with zeroes so that the image can be loaded at + * guest 'physical' address 0 + * - insert a Xen header + * - pad the end of the text so that data segment is page-aligned in the file + * - adjust the linenumber-pc table so Plan 9 debuggers won't be confused + */ + +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> + +#define PAGE 4096 +#define PLAN9HDR 32 +#define XENHDR 32 +#define KZERO 0x80000000 +#define FLAG_VALID (1<<16) +#define FLAG_PAE (1<<14) + +void +lput(long n) +{ + char buf[sizeof(long)]; + int i; + + for (i = sizeof(long)-1; i >= 0; i--) { + buf[i] = n; + n >>= 8; + } + write(1, buf, sizeof(long)); +} + +void +rput(long n) +{ + char buf[sizeof(long)]; + int i; + + for (i = 0; i < sizeof(long); i++) { + buf[i] = n; + n >>= 8; + } + write(1, buf, sizeof(long)); +} + +void +copy(long n) +{ + char buf[PAGE]; + int m; + + while (n > 0) { + m = sizeof buf; + if (m > n) + m = n; + read(0, buf, m); + write(1, buf, m); + n -= m; + } +} + +void pad(int n) +{ + char buf[PAGE]; + int m; + + memset(buf, 0, sizeof buf); + while (n > 0) { + m = sizeof buf; + if (m > n) + m = n; + write(1, buf, m); + n -= m; + } +} + +/* + * See /sys/src/cmd/8l/span.c:/^asmlc + */ +void adjustlnpc(int v) +{ + char buf[PAGE]; + int n, s; + + n = 0; + while (v) { + s = 127; + if (v < 127) + s = v; + buf[n++] = s+128; + if (n == sizeof buf) { + write(1, buf, n); + n = 0; + } + v -= s; + } + if (n > 0) + write(1, buf, n); +} + +void +main(int argc, char **argv) +{ + Fhdr fhdr; + long newtxtsz; + long newentry; + long newlnpcsz; + long prepad, postpad; + long flags; + + flags = FLAG_VALID; + if (argc > 1 && strcmp(argv[1], "-p") == 0) + flags |= FLAG_PAE; + + crackhdr(0, &fhdr); + + newtxtsz = ((fhdr.txtsz+PLAN9HDR+PAGE-1)&~(PAGE-1)) - PLAN9HDR; + newentry = KZERO+PLAN9HDR; + prepad = fhdr.entry - newentry; + postpad = newtxtsz - fhdr.txtsz; + newtxtsz += prepad; + newlnpcsz = fhdr.lnpcsz; + if (newlnpcsz) + newlnpcsz += (prepad+126)/127; + + /* plan 9 header */ + lput(4*11*11+7); /* magic */ + lput(newtxtsz); /* sizes */ + lput(fhdr.datsz); + lput(fhdr.bsssz); + lput(fhdr.symsz); /* nsyms */ + lput(newentry); /* va of entry */ + lput(fhdr.sppcsz); /* sp offsets */ + lput(newlnpcsz); /* line offsets */ + + /* xen header */ + rput(0x336EC578); /* magic */ + rput(flags); /* flags */ + rput(-(0x336EC578+flags)); /* checksum */ + rput(newentry); /* header_addr */ + rput(KZERO); /* load_addr */ + rput(KZERO+newtxtsz+fhdr.datsz); /* load_end_addr */ + rput(KZERO+newtxtsz+fhdr.datsz+fhdr.bsssz); /* bss_end_addr */ + rput(fhdr.entry); /* entry_addr */ + + pad(prepad-XENHDR); + + seek(0, fhdr.txtoff, 0); + copy(fhdr.txtsz); + pad(postpad); + copy(fhdr.datsz); + copy(fhdr.symsz); + if (newlnpcsz) { + adjustlnpc(prepad); + copy(fhdr.lnpcsz); + } + exits(0); +} diff --git a/sys/src/9/xen/xenelf.c b/sys/src/9/xen/xenelf.c new file mode 100644 index 000000000..130d4b201 --- /dev/null +++ b/sys/src/9/xen/xenelf.c @@ -0,0 +1,157 @@ +#include <u.h> +#include <libc.h> +#include "/sys/src/libmach/elf.h" + +enum { + Page = 4096, +}; + +#define ROUND(n) ((n+Page-1)&~(Page-1)) + +Shdr isect, csect; + +static ushort +GETS(void *a) +{ + uchar *p = a; + return p[0] | p[1]<<8; +} + +static ulong +GETL(void *a) +{ + uchar *p = a; + return p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24; +} + +static void +PUTS(void *a, ushort v) +{ + uchar *p = a; + p[0] = v; + p[1] = v>>8; +} + +static void +PUTL(void *a, ulong v) +{ + uchar *p = a; + p[0] = v; + p[1] = v>>8; + p[2] = v>>16; + p[3] = v>>24; +} + +void +copy(int fin, int fout, ulong src, ulong dst, ulong size) +{ + char buf[Page]; + int n; + + seek(fin, src, 0); + seek(fout, dst, 0); + n = Page; + while (size > 0) { + if (n > size) + n = size; + read(fin, buf, n); + write(fout, buf, n); + size -= n; + } +} + +void +main(int argc, char **argv) +{ + Ehdr e; + Shdr s; + Phdr p; + int efd, ofd, ns, i, n; + ulong shoff, off, noff, size, msize; + char *sname, *sval; + + if (argc != 5) + sysfatal("Usage: xenelf input-elf-file output-elf-file section-name section-contents"); + efd = open(argv[1], OREAD); + if (efd < 0) + sysfatal("%s: %r", argv[1]); + ofd = create(argv[2], OWRITE, 0666); + if (ofd < 0) + sysfatal("%s: %r", argv[2]); + sname = argv[3]; + sval = argv[4]; + + read(efd, &e, sizeof e); + //if (e.shstrndx) + // sysfatal("section header string index already present"); + + /* page-align loadable segments in file */ + ns = GETS(&e.phnum); + shoff = GETL(&e.phoff); + noff = shoff+ns*sizeof(Phdr); + noff = ROUND(noff); + for (i = 0; i < ns; i++) { + seek(efd, shoff+i*sizeof(Phdr), 0); + read(efd, &p, sizeof p); + off = GETL(&p.offset); + PUTL(&p.offset, noff); + size = GETL(&p.filesz); + copy(efd, ofd, off, noff, size); + if (GETL(&p.type) == LOAD) { + size = ROUND(size); + PUTL(&p.filesz, size); + if ((msize = GETL(&p.memsz)) != 0 && size > msize) + PUTL(&p.memsz, size); + } else { + /* memory size for symtab segment is actually line number table size */ + msize = GETL(&p.memsz); + copy(efd, ofd, off+size, noff+size, msize); + noff += msize; + } + noff += size; + seek(ofd, shoff+i*sizeof(Phdr), 0); + write(ofd, &p, sizeof p); + } + + /* append single-entry shstrndx */ + PUTL(&isect.offset, seek(ofd, noff, 0)); + n = strlen(sname); + PUTL(&isect.size, n+2); + write(ofd, sname+n, 1); + write(ofd, sname, n+1); + + /* append comment section contents */ + PUTL(&csect.name, 1); + PUTL(&csect.offset, seek(ofd, 0, 2)); + n = strlen(sval); + PUTL(&csect.size, n+1); + write(ofd, sval, n+1); + + /* copy existing section headers to end */ + ns = 0; //GETS(&e.shnum); + shoff = GETL(&e.shoff); + PUTL(&e.shoff, seek(ofd, 0, 2)); + for (i = 0; i < ns; i++) { + seek(efd, shoff+i*sizeof(Shdr), 0); + read(efd, &s, sizeof s); + seek(ofd, 0, 2); + write(ofd, &s, sizeof s); + } + + /* append section header for comment section */ + write(ofd, &csect, sizeof csect); + ++ns; + + /* append section header for shstrndx */ + PUTS(&e.shstrndx, ns); + ++ns; + write(ofd, &isect, sizeof isect); + + /* rewrite elf header */ + PUTS(&e.shentsize, sizeof(Shdr)); + PUTS(&e.shnum, ns); + seek(ofd, 0, 0); + write(ofd, &e, sizeof e); + + exits(0); +} diff --git a/sys/src/9/xen/xengrant.c b/sys/src/9/xen/xengrant.c new file mode 100644 index 000000000..d4e5500e6 --- /dev/null +++ b/sys/src/9/xen/xengrant.c @@ -0,0 +1,100 @@ +/* + * Sharing page frames with other domains + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum { + Nframes = 1, // XXX don't increase this without setting up extra mappings in xengrant_init() +}; + +static struct { + Lock; + ushort free; + ushort *refs; +} refalloc; + +static grant_entry_t *granttab; + +void +xengrantinit(void) +{ + gnttab_setup_table_t setup; + ulong frames[Nframes]; + int nrefs, i; + + setup.dom = DOMID_SELF; + setup.nr_frames = Nframes; + set_xen_guest_handle(setup.frame_list, frames); + if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 || setup.status != 0) + panic("xen grant table setup"); + granttab = (grant_entry_t*)mmumapframe(XENGRANTTAB, frames[0]); + nrefs = Nframes * BY2PG / sizeof(grant_entry_t); + refalloc.refs = (ushort*)malloc(nrefs*sizeof(ushort)); + for (i = 0; i < nrefs; i++) + refalloc.refs[i] = i-1; + refalloc.free = nrefs-1; +} + +static int +allocref(void) +{ + int ref; + + ilock(&refalloc); + ref = refalloc.free; + if (ref > 0) + refalloc.free = refalloc.refs[ref]; + iunlock(&refalloc); + return ref; +} + +static void +freeref(int ref) +{ + ilock(&refalloc); + refalloc.refs[ref] = refalloc.free; + refalloc.free = ref; + iunlock(&refalloc); +} + +int +xengrant(domid_t domid, ulong frame, int flags) +{ + int ref; + grant_entry_t *gt; + + if ((ref = allocref()) < 0) + panic("out of xengrant refs"); + gt = &granttab[ref]; + gt->frame = frame; + gt->domid = domid; + coherence(); + gt->flags = flags; + return ref; +} + +int +xengrantend(int ref) +{ + grant_entry_t *gt; + int frame; + + gt = &granttab[ref]; + coherence(); + if (gt->flags>F_accept_transfer) { + if ((gt->flags>F_transfer_completed) == 0) + panic("xengrantend transfer in progress"); + } else { + if (gt->flags&(GTF_reading|GTF_writing)) + panic("xengrantend frame in use"); + } + coherence(); + frame = gt->frame; + gt->flags = GTF_invalid; + freeref(ref); + return frame; +} diff --git a/sys/src/9/xen/xenpcf b/sys/src/9/xen/xenpcf new file mode 100644 index 000000000..1f4074496 --- /dev/null +++ b/sys/src/9/xen/xenpcf @@ -0,0 +1,62 @@ +dev + root netif + cons + uart + arch + env + pipe + proc + mnt + srv + shr + dup + rtc + ssl + tls + cap + kprof + fs + + xenstore + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum386 inferno + + sd + +link + etherxen + ethermedium + netdevmedium + loopbackmedium + +misc + archxen + sdxen + uartxen + +ip + tcp + udp + rudp + ipifc + icmp + icmp6 + gre + ipmux + esp + il + +port + int cpuserver = 0; + +boot boot + tcp + local + +bootdir + boot$CONF.out boot + /$objtype/bin/paqfs + /$objtype/bin/auth/factotum + bootfs.paq + xenstore + diff --git a/sys/src/9/xen/xenstore.c b/sys/src/9/xen/xenstore.c new file mode 100644 index 000000000..856975903 --- /dev/null +++ b/sys/src/9/xen/xenstore.c @@ -0,0 +1,130 @@ +#include <u.h> +#include <libc.h> + +typedef ulong uint32_t; + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED +}; + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +char* +xscmd(int fd, enum xsd_sockmsg_type cmd, char *s, char *val) +{ + static char buf[512]; + struct xsd_sockmsg *msg; + char *arg; + static ulong reqid = 1; + int n; + + msg = (struct xsd_sockmsg*)buf; + arg = buf + sizeof(*msg); + if(cmd != XS_WATCH_EVENT){ + msg->type = cmd; + msg->req_id = reqid++; + msg->tx_id = 0; + msg->len = strlen(s)+1; + if (val != 0) { + msg->len += strlen(val); + if (msg->type == XS_WATCH) + msg->len++; + } + strcpy(arg, s); + if (val != 0) + strcpy(arg+strlen(s)+1, val); + if (write(fd, buf, sizeof(*msg)+msg->len) < 0) + sysfatal("write: %r"); + } + if ((n = read(fd, buf, sizeof(*msg))) != sizeof(*msg)) + sysfatal("read hdr %d: %r", n); + fprint(2, "type %lud req_id %lud len %lud\n", msg->type, msg->req_id, msg->len); + if ((n = read(fd, arg, msg->len)) != msg->len) + sysfatal("read data %d: %r", n); + if (cmd == XS_DIRECTORY || cmd == XS_WATCH_EVENT) { + for (s = arg; s < arg+msg->len; s++) { + if (*s == 0) *s = ','; + else if (*s < 32) *s += '0'; + } + } + arg[msg->len] = 0; + return arg; +} + +void +usage(void) +{ + sysfatal("Usage: xenstore [lrwdme] path [value]\n"); +} + +void +main(int argc, char *argv[]) +{ + int fd; + + if (argc != 3 && argc != 4) + usage(); + if(access("/dev/xenstore", AEXIST) < 0) + bind("#x", "/dev", MAFTER); + fd = open("/dev/xenstore", ORDWR); + if (fd < 0) + sysfatal("/dev/xenstore: %r"); + switch (argv[1][0]) { + default: + usage(); + break; + case 'r': + print("%s\n", xscmd(fd, XS_READ, argv[2], 0)); + break; + case 'l': + print("%s\n", xscmd(fd, XS_DIRECTORY, argv[2], 0)); + break; + case 'm': + print("%s\n", xscmd(fd, XS_MKDIR, argv[2], 0)); + break; + case 'd': + print("%s\n", xscmd(fd, XS_RM, argv[2], 0)); + break; + case 'w': + if (argc != 4) + usage(); + print("%s\n", xscmd(fd, XS_WRITE, argv[2], argv[3])); + break; + case 'e': + if (argc != 4) + usage(); + print("%s\n", xscmd(fd, XS_WATCH, argv[2], argv[3])); + close(fd); + fd = open("/dev/xenwatch", OREAD); + if (fd < 0) + sysfatal("/dev/xenwatch: %r"); + for (;;) + print("%s\n", xscmd(fd, XS_WATCH_EVENT, 0, 0)); + } +} diff --git a/sys/src/9/xen/xensystem.c b/sys/src/9/xen/xensystem.c new file mode 100644 index 000000000..bff81afb3 --- /dev/null +++ b/sys/src/9/xen/xensystem.c @@ -0,0 +1,541 @@ +/* + * xensystem.c + * + * TODO: we could handle mmu updates more efficiently by + * using a multicall. + * XXX perhaps we should check return values and panic on failure? + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" + +#define LOG(a) + +/* + * These functions replace all the inlines that are used on Linux systems + */ + +/* in xen.s */ +int xencall1(int op); +int xencall2(int op, ulong arg1); +int xencall3(int op, ulong arg1, ulong arg2); +int xencall4(int op, ulong arg1, ulong arg2, ulong arg3); +int xencall5(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4); +int xencall6(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5); + +int +HYPERVISOR_update_va_mapping(ulong va, uvlong newval, ulong flags) +{ + int ret; + + ret = xencall5(__HYPERVISOR_update_va_mapping, va, newval, newval>>32, flags); + if(ret < 0) + panic("update_va_mapping failed"); + return ret; +} + +long +HYPERVISOR_set_timer_op(uvlong timeout) +{ + ulong hi, lo; + + hi = timeout>>32; + lo = timeout; + return xencall3(__HYPERVISOR_set_timer_op, lo, hi); +} + +int +HYPERVISOR_set_trap_table(trap_info_t *table) +{ + return xencall2(__HYPERVISOR_set_trap_table, (ulong)table); +} + +int +HYPERVISOR_mmu_update(mmu_update_t *req, int count, + int *success_count, domid_t domid) +{ + return xencall5(__HYPERVISOR_mmu_update, (ulong)req, count, (ulong)success_count, domid); +} + +int +HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid) +{ + return xencall5(__HYPERVISOR_mmuext_op, (ulong)op, count, (ulong)scount, domid); +} + +int +HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + return xencall3(__HYPERVISOR_set_gdt, (ulong)frame_list, entries); +} + +int +HYPERVISOR_stack_switch(ulong ss, ulong esp) +{ + return xencall3(__HYPERVISOR_stack_switch, ss, esp); +} + +/* XXX match evfunc and fsfunc prototypes? */ +int +HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc) +{ + return xencall5(__HYPERVISOR_set_callbacks, evss, evfunc, fsss, fsfunc); +} + +int +HYPERVISOR_fpu_taskswitch(void) +{ + return xencall1(__HYPERVISOR_fpu_taskswitch); +} + +int +HYPERVISOR_yield(void) +{ + return xencall3(__HYPERVISOR_sched_op, SCHEDOP_yield, 0); +} + +int +HYPERVISOR_block(void) +{ + return xencall3(__HYPERVISOR_sched_op, SCHEDOP_block, 0); +} + +int +HYPERVISOR_shutdown(int reboot) +{ + sched_shutdown_t arg; + + arg.reason = reboot? SHUTDOWN_reboot : SHUTDOWN_poweroff; + return xencall3(__HYPERVISOR_sched_op, SCHEDOP_shutdown, (ulong)&arg); +} + +int +HYPERVISOR_multicall(void *call_list, int nr_calls) +{ + return xencall3(__HYPERVISOR_multicall, (ulong)call_list, nr_calls); +} + + +int +HYPERVISOR_event_channel_op(void *op) +{ + return xencall2(__HYPERVISOR_event_channel_op, (ulong)op); +} + +int +HYPERVISOR_xen_version(int cmd, void *arg) +{ + return xencall3(__HYPERVISOR_xen_version, cmd, (ulong)arg); +} + +int +HYPERVISOR_console_io(int cmd, int count, char *str) +{ + return xencall4(__HYPERVISOR_console_io, cmd, count, (ulong)str); +} + +int +HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count) +{ + return xencall4(__HYPERVISOR_grant_table_op, cmd, (ulong)setup, count); +} + +int +HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg) +{ + return xencall3(__HYPERVISOR_memory_op, cmd, (ulong)arg); +} + +/* + * XXX this comment is leftover from old code. revisit and update. + * + * The use of 'barrier' in the following reflects their use as local-lock + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following + * critical operations are executed. All critical operatiosn must complete + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also + * includes these barriers, for example. + */ + +/* + * conversions to machine page numbers, pages and addresses + */ +#define MFN(pa) (patomfn[(pa)>>PGSHIFT]) +#define MFNPG(pa) ((uvlong)MFN(pa)<<PGSHIFT) +#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa)) +#define VA2MA(va) PA2MA(PADDR(va)) +#define VA2MFN(va) MFN(PADDR(va)) + +ulong hypervisor_virt_start; +ulong xentop; +start_info_t *xenstart; +shared_info_t *HYPERVISOR_shared_info; +ulong *patomfn; +ulong *matopfn; + +int +xenpdptpin(ulong va) +{ + struct mmuext_op op; + ulong mfn; + + mfn = MFN(PADDR(va)); + LOG(dprint("pdptpin %lux %lux\n", va, mfn);) + print("pdptpin %lux %lux\n", va, mfn); + /* mark page readonly first */ + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); + + /* L3 here refers to page directory pointer table (PAE mode) */ + op.cmd = MMUEXT_PIN_L3_TABLE; + op.arg1.mfn = mfn; + if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) + return 1; + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); + return 0; +} + +int +xenpgdpin(ulong va) +{ + struct mmuext_op op; + ulong mfn; + + mfn = MFN(PADDR(va)); + LOG(dprint("pdpin %lux %lux\n", va, mfn);) + /* mark page readonly first */ + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); + + /* to confuse you, L2 here refers to page directories */ + op.cmd = MMUEXT_PIN_L2_TABLE; + op.arg1.mfn = mfn; + if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) + return 1; + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); + return 0; +} + +int +xenptpin(ulong va) +{ + struct mmuext_op op; + ulong mfn; + + mfn = MFN(PADDR(va)); + LOG(dprint("pin %lux %lux\n", va, mfn);) + /* mark page readonly first */ + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); + + /* to confuse you, L1 here refers to page tables */ + op.cmd = MMUEXT_PIN_L1_TABLE; + op.arg1.mfn = mfn; + if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) + return 1; + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); + return 0; +} + +void +xenptunpin(ulong va) +{ + struct mmuext_op op; + ulong mfn; + + mfn = MFN(PADDR(va)); + LOG(dprint("unpin %lux %lux\n", va, mfn);) + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = mfn; + if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0) + panic("xenptunpin va=%lux called from %lux", va, getcallerpc(&va)); + + /* mark page read-write */ + HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); +} + +void +xenptswitch(ulong pa) +{ + struct mmuext_op op; + + op.cmd = MMUEXT_NEW_BASEPTR; + op.arg1.mfn = MFN(pa); + if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0) + panic("xenptswitch"); +} + +void +xentlbflush(void) +{ + struct mmuext_op op; + + op.cmd = MMUEXT_TLB_FLUSH_LOCAL; + HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF); +} + +/* update a pte using a machine page frame number */ +void +xenupdatema(ulong *ptr, uvlong val) +{ + mmu_update_t u; + + u.ptr = VA2MA(ptr); + u.val = val; + if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0) + panic("xenupdatema - pte %lux value %llux (was %llux) called from %lux", (ulong)ptr, val, *(uvlong*)ptr, getcallerpc(&ptr)); +} + +/* update a pte using a guest "physical" page number */ +void +xenupdate(ulong *ptr, ulong val) +{ + mmu_update_t u; + + u.ptr = VA2MA(ptr); + u.val = PA2MA(val); + if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0) + panic("xenupdate - pte %lux value %lux (%llux) called from %lux", (ulong)ptr, val, PA2MA(val), getcallerpc(&ptr)); +} + +void +acceptframe(int ref, void *va) +{ + ulong mfn; + + mfn = xengrantend(ref); + if (mfn == 0) + panic("can't accept page frame"); + LOG(dprint("acceptframe ref %d va %lux mfn %lux\n", ref, (ulong)va, mfn);) + VA2MFN(va) = mfn; + mmumapframe((ulong)va, mfn); +} + +int +donateframe(int domid, void *va) +{ + ulong mfn; + int ref; + ulong *pte; + struct xen_memory_reservation mem; + + mfn = VA2MFN(va); + ref = xengrant(domid, mfn, GTF_accept_transfer); + LOG(dprint("grant transfer %lux (%lux) -> %d\n", (ulong)va, mfn, ref);) + pte = mmuwalk(m->pdb, (ulong)va, 2, 0); + xenupdatema(pte, 0); + set_xen_guest_handle(mem.extent_start, &mfn); + mem.nr_extents = 1; + mem.extent_order = 0; + mem.address_bits = 0; + mem.domid = DOMID_SELF; + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &mem) != 1) + panic("XENMEM_decrease_reservation"); + VA2MFN(va) = ~0; + return ref; +} + +int +shareframe(int domid, void *va, int write) +{ + ulong mfn; + int ref; + int flags; + + mfn = VA2MFN(va); + flags = GTF_permit_access; + if (!write) + flags |= GTF_readonly; + ref = xengrant(domid, mfn, flags); + LOG(dprint("grant shared %lux (%lux) -> %d\n", (ulong)va, mfn, ref);) + return ref; +} + +/* + * Upcall from hypervisor, entered with evtchn_upcall_pending masked. + */ +void +xenupcall(Ureg *ureg) +{ + vcpu_info_t *vcpu; + shared_info_t *s; + ulong sel1, sel2, n1, n2, port; + + ureg->ecode = 0; + s = HYPERVISOR_shared_info; + vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; + for (;;) { + vcpu->evtchn_upcall_pending = 0; + sel1 = xchgl((uint*)&vcpu->evtchn_pending_sel, 0); + while(sel1) { + n1 = ffs(sel1); + sel1 &= ~(1<<n1); + sel2 = xchgl((uint*)&s->evtchn_pending[n1], 0); + while(sel2) { + n2 = ffs(sel2); + sel2 &= ~(1<<n2); + port = (n1<<5) + n2; + ureg->trap = 100+port; + trap(ureg); + } + } + if (vcpu->evtchn_upcall_pending) + continue; + vcpu->evtchn_upcall_mask = 0; + if (vcpu->evtchn_upcall_pending == 0) + break; + vcpu->evtchn_upcall_mask = 1; + } + +} + +/* + * tbdf field is abused to distinguish virqs from channels: + * + * tbdf=BUSUNKNOWN -> irq is a virq to be bound to a channel + * tbdf=0 -> irq is a channel number + */ +int +xenintrenable(Vctl *v) +{ + evtchn_op_t op; + uint port; + + /* XXX locking? */ + if (v->tbdf != BUSUNKNOWN) { + op.cmd = EVTCHNOP_bind_virq; + op.u.bind_virq.virq = v->irq; + op.u.bind_virq.vcpu = m->machno; + if(HYPERVISOR_event_channel_op(&op) != 0) + panic("xenintrenable: bind %d failed", v->irq); + port = op.u.bind_virq.port; + } else + port = v->irq; + if (port > 155) + return -1; + HYPERVISOR_shared_info->evtchn_mask[port/32] &= ~(1<<(port%32)); + if(0)print("xenintrenable %s: irq %d port %d mask[%d] = %#lux\n", v->name, v->irq, port, port/32, HYPERVISOR_shared_info->evtchn_mask[port/32]); + return 100+port; +} + +int +xenintrdisable(int irq) +{ + USED(irq); + panic("xenintrdisable notyet\n"); + return 0; +} + +int +xenintrvecno(int irq) +{ + return irq; +} + +int +islo(void) +{ + vcpu_info_t *cpu; + + cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared + return (cpu->evtchn_upcall_mask == 0); +} + +/* + * Note: Portable code expects spllo <= spl* <= spldone for + * accounting purposes. Lets hope the compiler doesn't reorder + * us. + */ +int +spllo(void) +{ + vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared + + if(cpu->evtchn_upcall_mask == 0) + return 0; + m->splpc = 0; + cpu->evtchn_upcall_mask = 0; + + /* + * If an event arrived while masked off, + * use a dummy call to trigger delivery + */ + if (cpu->evtchn_upcall_pending) + HYPERVISOR_xen_version(0, 0); + + return 1; +} + +int +splhi(void) +{ + ulong dummy; + vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared + int oldmask; + + oldmask = xchgb(&cpu->evtchn_upcall_mask, 1); + if (cpu->evtchn_upcall_mask != 1) + panic("xchgb"); + /* XXX ad-hoc ¨getcallerpc" because we have no arguments */ + m->splpc = (&dummy)[1]; + return oldmask; +} + +void +splx(int x) +{ + if(x) + splhi(); + else + spllo(); +} + +/* marker for profiling in portable code */ +void +spldone(void) +{ +} + +/* allocate an event channel */ +int +xenchanalloc(int dom) +{ + evtchn_op_t op; + + op.cmd = EVTCHNOP_alloc_unbound; + op.u.alloc_unbound.dom = DOMID_SELF; + op.u.alloc_unbound.remote_dom = dom; + if (HYPERVISOR_event_channel_op(&op) != 0) + panic("xenchanalloc"); + return op.u.alloc_unbound.port; +} + +/* notify over an event channel */ +void +xenchannotify(int port) +{ + evtchn_op_t op; + + op.cmd = EVTCHNOP_send; + op.u.send.port = port; + HYPERVISOR_event_channel_op(&op); +} + +void +halt(void) +{ + extern int nrdy; + + splhi(); + if (nrdy) { + spllo(); + return; + } + HYPERVISOR_block(); +} + +void +mb(void) +{ + coherence(); +} diff --git a/sys/src/9/xen/xentimer.c b/sys/src/9/xen/xentimer.c new file mode 100644 index 000000000..2a57cc5b9 --- /dev/null +++ b/sys/src/9/xen/xentimer.c @@ -0,0 +1,136 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +static vcpu_time_info_t shadow[MAX_VIRT_CPUS]; // XXX should be in Mach +static ulong wallclock; +static ulong wallclocksystime; + +/* + * Return a consistent set of time parameters. + */ +static vcpu_time_info_t * +getshadow(void) +{ + vcpu_time_info_t *s, *t; + + t = &HYPERVISOR_shared_info->vcpu_info[m->machno].time; + s = &shadow[m->machno]; // XXX place in mach struct + while(t->version != s->version) { + if (t->version&1) + continue; + s->version = t->version; + s->tsc_timestamp = t->tsc_timestamp; + s->system_time = t->system_time; + s->tsc_to_system_mul = t->tsc_to_system_mul; + s->tsc_shift = t->tsc_shift; + } + return s; +} + + +/* just get it from the shared info */ +void +guesscpuhz(int) // XXX no arg! +{ + vcpu_time_info_t *t; + + t = getshadow(); + m->cpuhz = (1000000000LL << 32) / t->tsc_to_system_mul; + if(t->tsc_shift < 0) + m->cpuhz <<= -t->tsc_shift; + else + m->cpuhz >>= t->tsc_shift; + m->cpumhz = m->cpuhz / 1000000L; +} + +void +xentimerset(uvlong next) +{ + uvlong soon; + + soon = fastticks(0) + 100000; + if (next < soon) + next = soon; + HYPERVISOR_set_timer_op(next); +} + +void +xentimerclock(Ureg* ureg, void*) +{ + + timerintr(ureg, 0); +} + +void +xentimerenable(void) +{ + intrenable(VIRQ_TIMER, xentimerclock, nil, 0, "Xen Timer"); +} + +uvlong +xentimerread(uvlong *hz) +{ + uvlong x; + uvlong delta, sdelta; + vcpu_time_info_t *t; + + t = getshadow(); + cycles(&x); + delta = x - t->tsc_timestamp; + if (t->tsc_shift < 0) + delta >>= -t->tsc_shift; + else + delta <<= t->tsc_shift; + mul64fract(&sdelta, delta, t->tsc_to_system_mul); + x = t->system_time + sdelta; + if (HYPERVISOR_shared_info->wc_sec != wallclock) { + wallclock = HYPERVISOR_shared_info->wc_sec; + wallclocksystime = x; + } + if (hz) + *hz = 1000000000; + return x; +} + +ulong +xenwallclock() +{ + ulong elapsed; + + elapsed = (ulong)((xentimerread(0) - wallclocksystime)/1000000000); + return wallclock + elapsed; +} + +void +microdelay(int microsecs) +{ + uvlong targ, hz; + + targ = xentimerread(&hz); + targ += microsecs * hz / 1000000; + while(xentimerread(0) < targ) + continue; +} + +void +delay(int millisecs) +{ + microdelay(millisecs * 1000); +} + +/* + * performance measurement ticks. must be low overhead. + * doesn't have to count over a second. + */ +ulong +perfticks(void) +{ + uvlong x; + + cycles(&x); + return x; +} |