summaryrefslogtreecommitdiff
path: root/sys/src/9/port/devaoe.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/9/port/devaoe.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/9/port/devaoe.c')
-rwxr-xr-xsys/src/9/port/devaoe.c2581
1 files changed, 2581 insertions, 0 deletions
diff --git a/sys/src/9/port/devaoe.c b/sys/src/9/port/devaoe.c
new file mode 100755
index 000000000..66f67f86a
--- /dev/null
+++ b/sys/src/9/port/devaoe.c
@@ -0,0 +1,2581 @@
+/*
+ * © 2005-2010 coraid
+ * aoe storage initiator
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "etherif.h"
+#include "../ip/ip.h"
+#include "../port/aoe.h"
+
+#pragma varargck argpos eventlog 1
+
+#define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug);
+#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
+
+enum {
+ Maxunits = 0xff,
+ Maxframes = 128,
+ Ndevlink = 6,
+ Nea = 6,
+ Nnetlink = 6,
+};
+
+#define TYPE(q) ((ulong)(q).path & 0xf)
+#define UNIT(q) (((ulong)(q).path>>4) & 0xff)
+#define L(q) (((ulong)(q).path>>12) & 0xf)
+#define QID(u, t) ((u)<<4 | (t))
+#define Q3(l, u, t) ((l)<<8 | QID(u, t))
+#define UP(d) ((d)->flag & Dup)
+/*
+ * would like this to depend on the chan (srb).
+ * not possible in the current structure.
+ */
+#define Nofail(d, s) ((d)->flag & Dnofail)
+
+#define MS2TK(t) ((t)/MS2HZ)
+
+enum {
+ Qzero,
+ Qtopdir = 1,
+ Qtopbase,
+ Qtopctl = Qtopbase,
+ Qtoplog,
+ Qtopend,
+
+ Qunitdir,
+ Qunitbase,
+ Qctl = Qunitbase,
+ Qdata,
+ Qconfig,
+ Qident,
+
+ Qdevlinkdir,
+ Qdevlinkbase,
+ Qdevlink = Qdevlinkbase,
+ Qdevlinkend,
+
+ Qtopfiles = Qtopend-Qtopbase,
+ Qdevlinkfiles = Qdevlinkend-Qdevlinkbase,
+
+ Eventlen = 256,
+ Nevents = 64, /* must be power of 2 */
+
+ Fread = 0,
+ Fwrite,
+ Tfree = -1,
+ Tmgmt,
+
+ /*
+ * round trip bounds, timeouts, in ticks.
+ * timeouts should be long enough that rebooting
+ * the coraid (which usually takes under two minutes)
+ * doesn't trigger a timeout.
+ */
+ Rtmax = MS2TK(320),
+ Rtmin = MS2TK(20),
+ Maxreqticks = 4*60*HZ, /* was 45*HZ */
+
+ Dbcnt = 1024,
+
+ Crd = 0x20,
+ Crdext = 0x24,
+ Cwr = 0x30,
+ Cwrext = 0x34,
+ Cid = 0xec,
+};
+
+enum {
+ Read,
+ Write,
+};
+
+/*
+ * unified set of flags
+ * a Netlink + Aoedev most both be jumbo capable
+ * to send jumbograms to that interface.
+ */
+enum {
+ /* sync with ahci.h */
+ Dllba = 1<<0,
+ Dsmart = 1<<1,
+ Dpower = 1<<2,
+ Dnop = 1<<3,
+ Datapi = 1<<4,
+ Datapi16= 1<<5,
+
+ /* aoe specific */
+ Dup = 1<<6,
+ Djumbo = 1<<7,
+ Dnofail = 1<<8,
+};
+
+static char *flagname[] = {
+ "llba",
+ "smart",
+ "power",
+ "nop",
+ "atapi",
+ "atapi16",
+
+ "up",
+ "jumbo",
+ "nofail",
+};
+
+typedef struct {
+ ushort flag;
+ uint lostjumbo;
+ int datamtu;
+
+ Chan *cc;
+ Chan *dc;
+ Chan *mtu; /* open early to prevent bind issues. */
+ char path[Maxpath];
+ uchar ea[Eaddrlen];
+} Netlink;
+
+typedef struct {
+ Netlink *nl;
+ int nea;
+ ulong eaidx;
+ uchar eatab[Nea][Eaddrlen];
+ ulong npkt;
+ ulong resent;
+ ushort flag;
+
+ ulong rttavg;
+ ulong mintimer;
+} Devlink;
+
+typedef struct Srb Srb;
+struct Srb {
+ Rendez;
+ Srb *next;
+ ulong ticksent;
+ ulong len;
+ vlong sector;
+ short write;
+ short nout;
+ char *error;
+ void *dp;
+ void *data;
+};
+
+typedef struct {
+ int tag;
+ ulong bcnt;
+ ulong dlen;
+ vlong lba;
+ ulong ticksent;
+ int nhdr;
+ uchar hdr[ETHERMINTU];
+ void *dp;
+ Devlink *dl;
+ Netlink *nl;
+ int eaidx;
+ Srb *srb;
+} Frame;
+
+typedef struct Aoedev Aoedev;
+struct Aoedev {
+ QLock;
+ Aoedev *next;
+
+ ulong vers;
+
+ int ndl;
+ ulong dlidx;
+ Devlink *dl;
+ Devlink dltab[Ndevlink];
+
+ ushort fwver;
+ ushort flag;
+ int nopen;
+ int major;
+ int minor;
+ int unit;
+ int lasttag;
+ int nframes;
+ Frame *frames;
+ vlong bsize;
+ vlong realbsize;
+
+ uint maxbcnt;
+ ushort nout;
+ ushort maxout;
+ ulong lastwadj;
+ Srb *head;
+ Srb *tail;
+ Srb *inprocess;
+
+ /* magic numbers 'R' us */
+ char serial[20+1];
+ char firmware[8+1];
+ char model[40+1];
+ int nconfig;
+ uchar config[1024];
+ uchar ident[512];
+};
+
+#pragma varargck type "æ" Aoedev*
+
+static struct {
+ Lock;
+ QLock;
+ Rendez;
+ char buf[Eventlen*Nevents];
+ char *rp;
+ char *wp;
+} events;
+
+static struct {
+ RWlock;
+ int nd;
+ Aoedev *d;
+} devs;
+
+static struct {
+ Lock;
+ int reader[Nnetlink]; /* reader is running. */
+ Rendez rendez[Nnetlink]; /* confirm exit. */
+ Netlink nl[Nnetlink];
+} netlinks;
+
+extern Dev aoedevtab;
+static Ref units;
+static Ref drivevers;
+static int debug;
+static int autodiscover = 1;
+static int rediscover;
+
+char Enotup[] = "aoe device is down";
+char Echange[] = "media or partition has changed";
+
+static Srb*
+srballoc(ulong sz)
+{
+ Srb *srb;
+
+ srb = malloc(sizeof *srb+sz);
+ srb->dp = srb->data = srb+1;
+ srb->ticksent = MACHP(0)->ticks;
+ return srb;
+}
+
+static Srb*
+srbkalloc(void *db, ulong)
+{
+ Srb *srb;
+
+ srb = malloc(sizeof *srb);
+ srb->dp = srb->data = db;
+ srb->ticksent = MACHP(0)->ticks;
+ return srb;
+}
+
+#define srbfree(srb) free(srb)
+
+static void
+srberror(Srb *srb, char *s)
+{
+ srb->error = s;
+ srb->nout--;
+ if (srb->nout == 0)
+ wakeup(srb);
+}
+
+static void
+frameerror(Aoedev *d, Frame *f, char *s)
+{
+ Srb *srb;
+
+ srb = f->srb;
+ if(f->tag == Tfree || !srb)
+ return;
+ f->srb = nil;
+ f->tag = Tfree; /* don't get fooled by way-slow responses */
+ srberror(srb, s);
+ d->nout--;
+}
+
+static char*
+unitname(Aoedev *d)
+{
+ uprint("%d.%d", d->major, d->minor);
+ return up->genbuf;
+}
+
+static int
+eventlogready(void*)
+{
+ return *events.rp;
+}
+
+static long
+eventlogread(void *a, long n)
+{
+ int len;
+ char *p, *buf;
+
+ buf = smalloc(Eventlen);
+ qlock(&events);
+ lock(&events);
+ p = events.rp;
+ len = *p;
+ if(len == 0){
+ n = 0;
+ unlock(&events);
+ } else {
+ if(n > len)
+ n = len;
+ /* can't move directly into pageable space with events lock held */
+ memmove(buf, p+1, n);
+ *p = 0;
+ events.rp = p += Eventlen;
+ if(p >= events.buf + sizeof events.buf)
+ events.rp = events.buf;
+ unlock(&events);
+
+ /* the concern here is page faults in memmove below */
+ if(waserror()){
+ free(buf);
+ qunlock(&events);
+ nexterror();
+ }
+ memmove(a, buf, n);
+ poperror();
+ }
+ free(buf);
+ qunlock(&events);
+ return n;
+}
+
+static int
+eventlog(char *fmt, ...)
+{
+ int dragrp, n;
+ char *p;
+ va_list arg;
+
+ lock(&events);
+ p = events.wp;
+ dragrp = *p++;
+ va_start(arg, fmt);
+ n = vsnprint(p, Eventlen-1, fmt, arg);
+ *--p = n;
+ p = events.wp += Eventlen;
+ if(p >= events.buf + sizeof events.buf)
+ p = events.wp = events.buf;
+ if(dragrp)
+ events.rp = p;
+ unlock(&events);
+ wakeup(&events);
+ return n;
+}
+
+static int
+eventcount(void)
+{
+ int n;
+
+ lock(&events);
+ if(*events.rp == 0)
+ n = 0;
+ else
+ n = (events.wp - events.rp) & (Nevents - 1);
+ unlock(&events);
+ return n/Eventlen;
+}
+
+static int
+tsince(int tag)
+{
+ int n;
+
+ n = MACHP(0)->ticks & 0xffff;
+ n -= tag & 0xffff;
+ if(n < 0)
+ n += 1<<16;
+ return n;
+}
+
+static int
+newtag(Aoedev *d)
+{
+ int t;
+
+ do {
+ t = ++d->lasttag << 16;
+ t |= MACHP(0)->ticks & 0xffff;
+ } while (t == Tfree || t == Tmgmt);
+ return t;
+}
+
+static void
+downdev(Aoedev *d, char *err)
+{
+ Frame *f, *e;
+
+ d->flag &= ~Dup;
+ f = d->frames;
+ e = f + d->nframes;
+ for(; f < e; f->tag = Tfree, f->srb = nil, f++)
+ frameerror(d, f, Enotup);
+ d->inprocess = nil;
+ eventlog("%æ: removed; %s\n", d, err);
+}
+
+static Block*
+allocfb(Frame *f)
+{
+ int len;
+ Block *b;
+
+ len = f->nhdr + f->dlen;
+ if(len < ETHERMINTU)
+ len = ETHERMINTU;
+ b = allocb(len);
+ memmove(b->wp, f->hdr, f->nhdr);
+ if(f->dlen)
+ memmove(b->wp + f->nhdr, f->dp, f->dlen);
+ b->wp += len;
+ return b;
+}
+
+static void
+putlba(Aoeata *a, vlong lba)
+{
+ uchar *c;
+
+ c = a->lba;
+ c[0] = lba;
+ c[1] = lba >> 8;
+ c[2] = lba >> 16;
+ c[3] = lba >> 24;
+ c[4] = lba >> 32;
+ c[5] = lba >> 40;
+}
+
+static Devlink*
+pickdevlink(Aoedev *d)
+{
+ ulong i, n;
+ Devlink *l;
+
+ for(i = 0; i < d->ndl; i++){
+ n = d->dlidx++ % d->ndl;
+ l = d->dl + n;
+ if(l && l->flag & Dup)
+ return l;
+ }
+ return 0;
+}
+
+static int
+pickea(Devlink *l)
+{
+ if(l == 0)
+ return -1;
+ if(l->nea == 0)
+ return -1;
+ return l->eaidx++ % l->nea;
+}
+
+static int
+hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
+{
+ int i;
+ Devlink *l;
+
+ if(f->srb && MACHP(0)->ticks - f->srb->ticksent > Maxreqticks){
+ eventlog("%æ: srb timeout\n", d);
+ if(cmd == ACata && f->srb && Nofail(d, s))
+ f->srb->ticksent = MACHP(0)->ticks;
+ else
+ frameerror(d, f, Etimedout);
+ return -1;
+ }
+ l = pickdevlink(d);
+ i = pickea(l);
+ if(i == -1){
+ if(cmd != ACata || f->srb == nil || !Nofail(d, s))
+ downdev(d, "resend fails; no netlink/ea");
+ return -1;
+ }
+ memmove(h->dst, l->eatab[i], Eaddrlen);
+ memmove(h->src, l->nl->ea, sizeof h->src);
+ hnputs(h->type, Aoetype);
+ h->verflag = Aoever << 4;
+ h->error = 0;
+ hnputs(h->major, d->major);
+ h->minor = d->minor;
+ h->cmd = cmd;
+
+ hnputl(h->tag, f->tag = newtag(d));
+ f->dl = l;
+ f->nl = l->nl;
+ f->eaidx = i;
+ f->ticksent = MACHP(0)->ticks;
+
+ return f->tag;
+}
+
+static int
+resend(Aoedev *d, Frame *f)
+{
+ ulong n;
+ Aoeata *a;
+
+ a = (Aoeata*)f->hdr;
+ if(hset(d, f, a, a->cmd) == -1)
+ return -1;
+ n = f->bcnt;
+ if(n > d->maxbcnt){
+ n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */
+ if(f->dlen > n)
+ f->dlen = n;
+ }
+ a->scnt = n / Aoesectsz;
+ f->dl->resent++;
+ f->dl->npkt++;
+ if(waserror())
+ return -1;
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ poperror();
+ return 0;
+}
+
+static void
+discover(int major, int minor)
+{
+ Aoehdr *h;
+ Block *b;
+ Netlink *nl, *e;
+
+ nl = netlinks.nl;
+ e = nl + nelem(netlinks.nl);
+ for(; nl < e; nl++){
+ if(nl->cc == nil)
+ continue;
+ b = allocb(ETHERMINTU);
+ if(waserror()){
+ freeb(b);
+ nexterror();
+ }
+ b->wp = b->rp + ETHERMINTU;
+ memset(b->rp, 0, ETHERMINTU);
+ h = (Aoehdr*)b->rp;
+ memset(h->dst, 0xff, sizeof h->dst);
+ memmove(h->src, nl->ea, sizeof h->src);
+ hnputs(h->type, Aoetype);
+ h->verflag = Aoever << 4;
+ hnputs(h->major, major);
+ h->minor = minor;
+ h->cmd = ACconfig;
+ poperror();
+ /* send b down the queue */
+ devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
+ }
+}
+
+/*
+ * Check all frames on device and resend any frames that have been
+ * outstanding for 200% of the device round trip time average.
+ */
+static void
+aoesweepproc(void*)
+{
+ ulong i, tx, timeout, nbc;
+ vlong starttick;
+ enum { Nms = 100, Nbcms = 30*1000, }; /* magic */
+ uchar *ea;
+ Aoeata *a;
+ Aoedev *d;
+ Devlink *l;
+ Frame *f, *e;
+
+ nbc = Nbcms/Nms;
+loop:
+ if(nbc-- == 0){
+ if(rediscover && !waserror()){
+ discover(0xffff, 0xff);
+ poperror();
+ }
+ nbc = Nbcms/Nms;
+ }
+ starttick = MACHP(0)->ticks;
+ rlock(&devs);
+ for(d = devs.d; d; d = d->next){
+ if(!canqlock(d))
+ continue;
+ if(!UP(d)){
+ qunlock(d);
+ continue;
+ }
+ tx = 0;
+ f = d->frames;
+ e = f + d->nframes;
+ for (; f < e; f++){
+ if(f->tag == Tfree)
+ continue;
+ l = f->dl;
+ timeout = l->rttavg << 1;
+ i = tsince(f->tag);
+ if(i < timeout)
+ continue;
+ if(d->nout == d->maxout){
+ if(d->maxout > 1)
+ d->maxout--;
+ d->lastwadj = MACHP(0)->ticks;
+ }
+ a = (Aoeata*)f->hdr;
+ if(a->scnt > Dbcnt / Aoesectsz &&
+ ++f->nl->lostjumbo > (d->nframes << 1)){
+ ea = f->dl->eatab[f->eaidx];
+ eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
+ d, f->nl->path, ea, f->lba);
+ d->maxbcnt = Dbcnt;
+ d->flag &= ~Djumbo;
+ }
+ resend(d, f);
+ if(tx++ == 0){
+ if((l->rttavg <<= 1) > Rtmax)
+ l->rttavg = Rtmax;
+ eventlog("%æ: rtt %ldms\n", d, TK2MS(l->rttavg));
+ }
+ }
+ if(d->nout == d->maxout && d->maxout < d->nframes &&
+ TK2MS(MACHP(0)->ticks - d->lastwadj) > 10*1000){ /* more magic */
+ d->maxout++;
+ d->lastwadj = MACHP(0)->ticks;
+ }
+ qunlock(d);
+ }
+ runlock(&devs);
+ i = Nms - TK2MS(MACHP(0)->ticks - starttick);
+ if(i > 0)
+ tsleep(&up->sleep, return0, 0, i);
+ goto loop;
+}
+
+static int
+fmtæ(Fmt *f)
+{
+ char buf[16];
+ Aoedev *d;
+
+ d = va_arg(f->args, Aoedev*);
+ snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
+ return fmtstrcpy(f, buf);
+}
+
+static void netbind(char *path);
+
+static void
+aoecfg(void)
+{
+ int n, i;
+ char *p, *f[32], buf[24];
+
+ if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
+ return;
+ /* goo! */
+ for(i = 0; i < n; i++){
+ p = f[i];
+ if(strncmp(p, "ether", 5) == 0)
+ snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
+ else if(strncmp(p, "#l", 2) == 0)
+ snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
+ else
+ continue;
+ if(!waserror()){
+ netbind(buf);
+ poperror();
+ }
+ }
+}
+
+static void
+aoeinit(void)
+{
+ static int init;
+ static QLock l;
+
+ if(!canqlock(&l))
+ return;
+ if(init == 0){
+ fmtinstall(L'æ', fmtæ);
+ events.rp = events.wp = events.buf;
+ kproc("aoesweep", aoesweepproc, nil);
+ aoecfg();
+ init = 1;
+ }
+ qunlock(&l);
+}
+
+static Chan*
+aoeattach(char *spec)
+{
+ Chan *c;
+
+ if(*spec)
+ error(Enonexist);
+ aoeinit();
+ c = devattach(L'æ', spec);
+ mkqid(&c->qid, Qzero, 0, QTDIR);
+ return c;
+}
+
+static Aoedev*
+unit2dev(ulong unit)
+{
+ int i;
+ Aoedev *d;
+
+ rlock(&devs);
+ i = 0;
+ for(d = devs.d; d; d = d->next)
+ if(i++ == unit){
+ runlock(&devs);
+ return d;
+ }
+ runlock(&devs);
+ uprint("unit lookup failure: %lux pc %#p", unit, getcallerpc(&unit));
+ error(up->genbuf);
+ return nil;
+}
+
+static int
+unitgen(Chan *c, ulong type, Dir *dp)
+{
+ int perm, t;
+ ulong vers;
+ vlong size;
+ char *p;
+ Aoedev *d;
+ Qid q;
+
+ d = unit2dev(UNIT(c->qid));
+ perm = 0644;
+ size = 0;
+ vers = d->vers;
+ t = QTFILE;
+
+ switch(type){
+ default:
+ return -1;
+ case Qctl:
+ p = "ctl";
+ break;
+ case Qdata:
+ p = "data";
+ perm = 0640;
+ if(UP(d))
+ size = d->bsize;
+ break;
+ case Qconfig:
+ p = "config";
+ if(UP(d))
+ size = d->nconfig;
+ break;
+ case Qident:
+ p = "ident";
+ if(UP(d))
+ size = sizeof d->ident;
+ break;
+ case Qdevlinkdir:
+ p = "devlink";
+ t = QTDIR;
+ perm = 0555;
+ break;
+ }
+ mkqid(&q, QID(UNIT(c->qid), type), vers, t);
+ devdir(c, q, p, size, eve, perm, dp);
+ return 1;
+}
+
+static int
+topgen(Chan *c, ulong type, Dir *d)
+{
+ int perm;
+ vlong size;
+ char *p;
+ Qid q;
+
+ perm = 0444;
+ size = 0;
+ switch(type){
+ default:
+ return -1;
+ case Qtopctl:
+ p = "ctl";
+ perm = 0644;
+ break;
+ case Qtoplog:
+ p = "log";
+ size = eventcount();
+ break;
+ }
+ mkqid(&q, type, 0, QTFILE);
+ devdir(c, q, p, size, eve, perm, d);
+ return 1;
+}
+
+static int
+aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
+{
+ int i;
+ Aoedev *d;
+ Qid q;
+
+ if(c->qid.path == 0){
+ switch(s){
+ case DEVDOTDOT:
+ q.path = 0;
+ q.type = QTDIR;
+ devdir(c, q, "#æ", 0, eve, 0555, dp);
+ break;
+ case 0:
+ q.path = Qtopdir;
+ q.type = QTDIR;
+ devdir(c, q, "aoe", 0, eve, 0555, dp);
+ break;
+ default:
+ return -1;
+ }
+ return 1;
+ }
+
+ switch(TYPE(c->qid)){
+ default:
+ return -1;
+ case Qtopdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, Qzero, 0, QTDIR);
+ devdir(c, q, "aoe", 0, eve, 0555, dp);
+ return 1;
+ }
+ if(s < Qtopfiles)
+ return topgen(c, Qtopbase + s, dp);
+ s -= Qtopfiles;
+ if(s >= units.ref)
+ return -1;
+ mkqid(&q, QID(s, Qunitdir), 0, QTDIR);
+ d = unit2dev(s);
+ assert(d != nil);
+ devdir(c, q, unitname(d), 0, eve, 0555, dp);
+ return 1;
+ case Qtopctl:
+ case Qtoplog:
+ return topgen(c, TYPE(c->qid), dp);
+ case Qunitdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
+ uprint("%uld", UNIT(c->qid));
+ devdir(c, q, up->genbuf, 0, eve, 0555, dp);
+ return 1;
+ }
+ return unitgen(c, Qunitbase+s, dp);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitgen(c, TYPE(c->qid), dp);
+ case Qdevlinkdir:
+ i = UNIT(c->qid);
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
+ devdir(c, q, "devlink", 0, eve, 0555, dp);
+ return 1;
+ }
+ if(i >= units.ref)
+ return -1;
+ d = unit2dev(i);
+ if(s >= d->ndl)
+ return -1;
+ uprint("%d", s);
+ mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
+ devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+ return 1;
+ case Qdevlink:
+ uprint("%d", s);
+ mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
+ devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+ return 1;
+ }
+}
+
+static Walkqid*
+aoewalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, nil, 0, aoegen);
+}
+
+static int
+aoestat(Chan *c, uchar *db, int n)
+{
+ return devstat(c, db, n, nil, 0, aoegen);
+}
+
+static Chan*
+aoeopen(Chan *c, int omode)
+{
+ Aoedev *d;
+
+ if(TYPE(c->qid) != Qdata)
+ return devopen(c, omode, 0, 0, aoegen);
+
+ d = unit2dev(UNIT(c->qid));
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ nexterror();
+ }
+ if(!UP(d))
+ error(Enotup);
+ c = devopen(c, omode, 0, 0, aoegen);
+ d->nopen++;
+ poperror();
+ qunlock(d);
+ return c;
+}
+
+static void
+aoeclose(Chan *c)
+{
+ Aoedev *d;
+
+ if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
+ return;
+
+ d = unit2dev(UNIT(c->qid));
+ qlock(d);
+ if(--d->nopen == 0 && !waserror()){
+ discover(d->major, d->minor);
+ poperror();
+ }
+ qunlock(d);
+}
+
+static void
+atarw(Aoedev *d, Frame *f)
+{
+ ulong bcnt;
+ char extbit, writebit;
+ Aoeata *ah;
+ Srb *srb;
+
+ extbit = 0x4;
+ writebit = 0x10;
+
+ srb = d->inprocess;
+ bcnt = d->maxbcnt;
+ if(bcnt > srb->len)
+ bcnt = srb->len;
+ f->nhdr = AOEATASZ;
+ memset(f->hdr, 0, f->nhdr);
+ ah = (Aoeata*)f->hdr;
+ if(hset(d, f, ah, ACata) == -1) {
+ d->inprocess = nil;
+ return;
+ }
+ f->dp = srb->dp;
+ f->bcnt = bcnt;
+ f->lba = srb->sector;
+ f->srb = srb;
+
+ ah->scnt = bcnt / Aoesectsz;
+ putlba(ah, f->lba);
+ if(d->flag & Dllba)
+ ah->aflag |= AAFext;
+ else {
+ extbit = 0;
+ ah->lba[3] &= 0x0f;
+ ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */
+ }
+ if(srb->write){
+ ah->aflag |= AAFwrite;
+ f->dlen = bcnt;
+ }else{
+ writebit = 0;
+ f->dlen = 0;
+ }
+ ah->cmdstat = 0x20 | writebit | extbit;
+
+ /* mark tracking fields and load out */
+ srb->nout++;
+ srb->dp = (uchar*)srb->dp + bcnt;
+ srb->len -= bcnt;
+ srb->sector += bcnt / Aoesectsz;
+ if(srb->len == 0)
+ d->inprocess = nil;
+ d->nout++;
+ f->dl->npkt++;
+ if(waserror()){
+ f->tag = Tfree;
+ d->inprocess = nil;
+ nexterror();
+ }
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ poperror();
+}
+
+static char*
+aoeerror(Aoehdr *h)
+{
+ int n;
+ static char *errs[] = {
+ "aoe protocol error: unknown",
+ "aoe protocol error: bad command code",
+ "aoe protocol error: bad argument param",
+ "aoe protocol error: device unavailable",
+ "aoe protocol error: config string present",
+ "aoe protocol error: unsupported version",
+ "aoe protocol error: target is reserved",
+ };
+
+ if((h->verflag & AFerr) == 0)
+ return 0;
+ n = h->error;
+ if(n > nelem(errs))
+ n = 0;
+ return errs[n];
+}
+
+static void
+rtupdate(Devlink *l, int rtt)
+{
+ int n;
+
+ n = rtt;
+ if(rtt < 0){
+ n = -rtt;
+ if(n < Rtmin)
+ n = Rtmin;
+ else if(n > Rtmax)
+ n = Rtmax;
+ l->mintimer += (n - l->mintimer) >> 1;
+ } else if(n < l->mintimer)
+ n = l->mintimer;
+ else if(n > Rtmax)
+ n = Rtmax;
+
+ /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
+ n -= l->rttavg;
+ l->rttavg += n >> 2;
+}
+
+static int
+srbready(void *v)
+{
+ Srb *s;
+
+ s = v;
+ return s->error || (!s->nout && !s->len);
+}
+
+static Frame*
+getframe(Aoedev *d, int tag)
+{
+ Frame *f, *e;
+
+ f = d->frames;
+ e = f + d->nframes;
+ for(; f < e; f++)
+ if(f->tag == tag)
+ return f;
+ return nil;
+}
+
+static Frame*
+freeframe(Aoedev *d)
+{
+ if(d->nout < d->maxout)
+ return getframe(d, Tfree);
+ return nil;
+}
+
+static void
+work(Aoedev *d)
+{
+ Frame *f;
+
+ while ((f = freeframe(d)) != nil) {
+ if(d->inprocess == nil){
+ if(d->head == nil)
+ return;
+ d->inprocess = d->head;
+ d->head = d->head->next;
+ if(d->head == nil)
+ d->tail = nil;
+ }
+ atarw(d, f);
+ }
+}
+
+static void
+strategy(Aoedev *d, Srb *srb)
+{
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ nexterror();
+ }
+ srb->next = nil;
+ if(d->tail)
+ d->tail->next = srb;
+ d->tail = srb;
+ if(d->head == nil)
+ d->head = srb;
+ work(d);
+ poperror();
+ qunlock(d);
+
+ while(waserror())
+ ;
+ sleep(srb, srbready, srb);
+ poperror();
+}
+
+#define iskaddr(a) ((uintptr)(a) > KZERO)
+
+static long
+rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
+{
+ long n, nlen, copy;
+ enum { Srbsz = 1<<19, }; /* magic allocation */
+ Srb *srb;
+
+ if((off|len) & (Aoesectsz-1))
+ error("offset and length must be sector multiple.\n");
+ if(off > d->bsize || len == 0)
+ return 0;
+ if(off + len > d->bsize)
+ len = d->bsize - off;
+ copy = 0;
+ if(iskaddr(db)){
+ srb = srbkalloc(db, len);
+ copy = 1;
+ }else
+ srb = srballoc(Srbsz <= len? Srbsz: len);
+ if(waserror()){
+ srbfree(srb);
+ nexterror();
+ }
+ nlen = len;
+ srb->write = write;
+ do {
+ if(!UP(d))
+ error(Eio);
+ srb->sector = off / Aoesectsz;
+ srb->dp = srb->data;
+ n = nlen;
+ if(n > Srbsz)
+ n = Srbsz;
+ srb->len = n;
+ if(write && !copy)
+ memmove(srb->data, db, n);
+ strategy(d, srb);
+ if(srb->error)
+ error(srb->error);
+ if(!write && !copy)
+ memmove(db, srb->data, n);
+ nlen -= n;
+ db += n;
+ off += n;
+ } while (nlen > 0);
+ poperror();
+ srbfree(srb);
+ return len;
+}
+
+static long
+readmem(ulong off, void *dst, long n, void *src, long size)
+{
+ if(off >= size)
+ return 0;
+ if(off + n > size)
+ n = size - off;
+ memmove(dst, (uchar*)src + off, n);
+ return n;
+}
+
+static char *
+pflag(char *s, char *e, uchar f)
+{
+ uchar i, m;
+
+ for(i = 0; i < 8; i++){
+ m = 1 << i;
+ if(f & m)
+ s = seprint(s, e, "%s ", flagname[i]? flagname[i]: "oops");
+ }
+ return seprint(s, e, "\n");
+}
+
+static int
+pstat(Aoedev *d, char *db, int len, int off)
+{
+ int i;
+ char *state, *s, *p, *e;
+
+ s = p = malloc(READSTR);
+ e = p + READSTR;
+
+ state = "down";
+ if(UP(d))
+ state = "up";
+
+ p = seprint(p, e,
+ "state: %s\n" "nopen: %d\n" "nout: %d\n"
+ "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d\n"
+ "fw: %.4ux\n"
+ "model: %s\n" "serial: %s\n" "firmware: %s\n",
+ state, d->nopen, d->nout,
+ d->maxout, d->nframes, d->maxbcnt,
+ d->fwver,
+ d->model, d->serial, d->firmware);
+ p = seprint(p, e, "flag: ");
+ p = pflag(p, e, d->flag);
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+unitread(Chan *c, void *db, long len, vlong off)
+{
+ Aoedev *d;
+
+ d = unit2dev(UNIT(c->qid));
+ if(d->vers != c->qid.vers)
+ error(Echange);
+ switch(TYPE(c->qid)){
+ default:
+ error(Ebadarg);
+ case Qctl:
+ return pstat(d, db, len, off);
+ case Qdata:
+ return rw(d, Read, db, len, off);
+ case Qconfig:
+ if (!UP(d))
+ error(Enotup);
+ return readmem(off, db, len, d->config, d->nconfig);
+ case Qident:
+ if (!UP(d))
+ error(Enotup);
+ return readmem(off, db, len, d->ident, sizeof d->ident);
+ }
+}
+
+static int
+devlinkread(Chan *c, void *db, int len, int off)
+{
+ int i;
+ char *s, *p, *e;
+ Aoedev *d;
+ Devlink *l;
+
+ d = unit2dev(UNIT(c->qid));
+ i = L(c->qid);
+ if(i >= d->ndl)
+ return 0;
+ l = d->dl + i;
+
+ s = p = malloc(READSTR);
+ e = s + READSTR;
+
+ p = seprint(p, e, "addr: ");
+ for(i = 0; i < l->nea; i++)
+ p = seprint(p, e, "%E ", l->eatab[i]);
+ p = seprint(p, e, "\n");
+ p = seprint(p, e, "npkt: %uld\n", l->npkt);
+ p = seprint(p, e, "resent: %uld\n", l->resent);
+ p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
+ p = seprint(p, e, "rttavg: %uld\n", TK2MS(l->rttavg));
+ p = seprint(p, e, "mintimer: %uld\n", TK2MS(l->mintimer));
+
+ p = seprint(p, e, "nl path: %s\n", l->nl->path);
+ p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
+ p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
+ p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
+ p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+topctlread(Chan *, void *db, int len, int off)
+{
+ int i;
+ char *s, *p, *e;
+ Netlink *n;
+
+ s = p = malloc(READSTR);
+ e = s + READSTR;
+
+ p = seprint(p, e, "debug: %d\n", debug);
+ p = seprint(p, e, "autodiscover: %d\n", autodiscover);
+ p = seprint(p, e, "rediscover: %d\n", rediscover);
+
+ for(i = 0; i < Nnetlink; i++){
+ n = netlinks.nl+i;
+ if(n->cc == 0)
+ continue;
+ p = seprint(p, e, "if%d path: %s\n", i, n->path);
+ p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
+ p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
+ p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
+ p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
+ }
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+aoeread(Chan *c, void *db, long n, vlong off)
+{
+ switch(TYPE(c->qid)){
+ default:
+ error(Eperm);
+ case Qzero:
+ case Qtopdir:
+ case Qunitdir:
+ case Qdevlinkdir:
+ return devdirread(c, db, n, 0, 0, aoegen);
+ case Qtopctl:
+ return topctlread(c, db, n, off);
+ case Qtoplog:
+ return eventlogread(db, n);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitread(c, db, n, off);
+ case Qdevlink:
+ return devlinkread(c, db, n, off);
+ }
+}
+
+static long
+configwrite(Aoedev *d, void *db, long len)
+{
+ char *s;
+ Aoeqc *ch;
+ Frame *f;
+ Srb *srb;
+
+ if(!UP(d))
+ error(Enotup);
+ if(len > ETHERMAXTU - AOEQCSZ)
+ error(Etoobig);
+ srb = srballoc(len);
+ s = malloc(len);
+ memmove(s, db, len);
+ if(waserror()){
+ srbfree(srb);
+ free(s);
+ nexterror();
+ }
+ for (;;) {
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ nexterror();
+ }
+ f = freeframe(d);
+ if(f != nil)
+ break;
+ poperror();
+ qunlock(d);
+ if(waserror())
+ nexterror();
+ tsleep(&up->sleep, return0, 0, 100);
+ poperror();
+ }
+ f->nhdr = AOEQCSZ;
+ memset(f->hdr, 0, f->nhdr);
+ ch = (Aoeqc*)f->hdr;
+ if(hset(d, f, ch, ACconfig) == -1)
+ return 0;
+ f->srb = srb;
+ f->dp = s;
+ ch->verccmd = AQCfset;
+ hnputs(ch->cslen, len);
+ d->nout++;
+ srb->nout++;
+ f->dl->npkt++;
+ f->dlen = len;
+ /*
+ * these refer to qlock & waserror in the above for loop.
+ * there's still the first waserror outstanding.
+ */
+ poperror();
+ qunlock(d);
+
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ sleep(srb, srbready, srb);
+ if(srb->error)
+ error(srb->error);
+
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ nexterror();
+ }
+ memmove(d->config, s, len);
+ d->nconfig = len;
+ poperror();
+ qunlock(d);
+
+ poperror(); /* pop first waserror */
+
+ srbfree(srb);
+ memmove(db, s, len);
+ free(s);
+ return len;
+}
+
+static int getmtu(Chan*);
+
+static int
+devmaxdata(Aoedev *d)
+{
+ int i, m, mtu;
+ Devlink *l;
+ Netlink *n;
+
+ mtu = 100000;
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ n = l->nl;
+ if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
+ continue;
+ m = getmtu(n->mtu);
+ if(m < mtu)
+ mtu = m;
+ }
+ if(mtu == 100000)
+ mtu = 0;
+ mtu -= AOEATASZ;
+ return mtu;
+}
+
+static int
+toggle(char *s, int init)
+{
+ if(s == nil)
+ return init ^ 1;
+ return strcmp(s, "on") == 0;
+}
+
+static void ataident(Aoedev*);
+
+static long
+unitctlwrite(Aoedev *d, void *db, long n)
+{
+ uint maxbcnt, m;
+ uvlong bsize;
+ enum {
+ Failio,
+ Ident,
+ Jumbo,
+ Maxbno,
+ Mtu,
+ Nofailf,
+ Setsize,
+ };
+ Cmdbuf *cb;
+ Cmdtab *ct;
+ static Cmdtab cmds[] = {
+ {Failio, "failio", 1 },
+ {Ident, "identify", 1 },
+ {Jumbo, "jumbo", 0 },
+ {Maxbno, "maxbno", 0 },
+ {Mtu, "mtu", 0 },
+ {Nofailf, "nofail", 0 },
+ {Setsize, "setsize", 0 },
+ };
+
+ cb = parsecmd(db, n);
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ free(cb);
+ nexterror();
+ }
+ ct = lookupcmd(cb, cmds, nelem(cmds));
+ switch(ct->index){
+ case Failio:
+ downdev(d, "i/o failure");
+ break;
+ case Ident:
+ ataident(d);
+ break;
+ case Jumbo:
+ m = 0;
+ if(d->flag & Djumbo)
+ m = 1;
+ toggle(cb->f[1], m);
+ if(m)
+ d->flag |= Djumbo;
+ else
+ d->flag &= ~Djumbo;
+ break;
+ case Maxbno:
+ case Mtu:
+ maxbcnt = devmaxdata(d);
+ if(cb->nf > 2)
+ error(Ecmdargs);
+ if(cb->nf == 2){
+ m = strtoul(cb->f[1], 0, 0);
+ if(ct->index == Maxbno)
+ m *= Aoesectsz;
+ else{
+ m -= AOEATASZ;
+ m &= ~(Aoesectsz-1);
+ }
+ if(m == 0 || m > maxbcnt)
+ cmderror(cb, "mtu out of legal range");
+ maxbcnt = m;
+ }
+ d->maxbcnt = maxbcnt;
+ break;
+ case Nofailf:
+ if (toggle(cb->f[1], (d->flag & Dnofail) != 0))
+ d->flag |= Dnofail;
+ else
+ d->flag &= ~Dnofail;
+ break;
+ case Setsize:
+ bsize = d->realbsize;
+ if(cb->nf > 2)
+ error(Ecmdargs);
+ if(cb->nf == 2){
+ bsize = strtoull(cb->f[1], 0, 0);
+ if(bsize % Aoesectsz)
+ cmderror(cb, "disk size must be sector aligned");
+ }
+ d->bsize = bsize;
+ break;
+ default:
+ cmderror(cb, "unknown aoe control message");
+ }
+ poperror();
+ qunlock(d);
+ free(cb);
+ return n;
+}
+
+static long
+unitwrite(Chan *c, void *db, long n, vlong off)
+{
+ long rv;
+ char *buf;
+ Aoedev *d;
+
+ d = unit2dev(UNIT(c->qid));
+ switch(TYPE(c->qid)){
+ default:
+ error(Ebadarg);
+ case Qctl:
+ return unitctlwrite(d, db, n);
+ case Qident:
+ error(Eperm);
+ case Qdata:
+ return rw(d, Write, db, n, off);
+ case Qconfig:
+ if(off + n > sizeof d->config)
+ error(Etoobig);
+ buf = malloc(sizeof d->config);
+ if(waserror()){
+ free(buf);
+ nexterror();
+ }
+ memmove(buf, d->config, d->nconfig);
+ memmove(buf + off, db, n);
+ rv = configwrite(d, buf, n + off);
+ poperror();
+ free(buf);
+ return rv;
+ }
+}
+
+static Netlink*
+addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
+{
+ Netlink *nl, *e;
+
+ lock(&netlinks);
+ if(waserror()){
+ unlock(&netlinks);
+ nexterror();
+ }
+ nl = netlinks.nl;
+ e = nl + nelem(netlinks.nl);
+ for(; nl < e && nl->cc; nl++)
+ continue;
+ if (nl >= e)
+ error("out of netlink structures");
+ nl->cc = cc;
+ nl->dc = dc;
+ nl->mtu = mtu;
+ strncpy(nl->path, path, sizeof nl->path);
+ memmove(nl->ea, ea, sizeof nl->ea);
+ poperror();
+ nl->flag |= Dup;
+ unlock(&netlinks);
+ return nl;
+}
+
+static int
+newunit(void)
+{
+ int x;
+
+ lock(&units);
+ if(units.ref == Maxunits)
+ x = -1;
+ else
+ x = units.ref++;
+ unlock(&units);
+ return x;
+}
+
+static int
+dropunit(void)
+{
+ int x;
+
+ lock(&units);
+ x = --units.ref;
+ unlock(&units);
+ return x;
+}
+
+/*
+ * always allocate max frames. maxout may change.
+ */
+static Aoedev*
+newdev(long major, long minor, int n)
+{
+ Aoedev *d;
+ Frame *f, *e;
+
+ d = mallocz(sizeof *d, 1);
+ f = mallocz(sizeof *f * Maxframes, 1);
+ if (!d || !f) {
+ free(d);
+ free(f);
+ error("aoe device allocation failure");
+ }
+ d->nframes = n;
+ d->frames = f;
+ for (e = f + n; f < e; f++)
+ f->tag = Tfree;
+ d->maxout = n;
+ d->major = major;
+ d->minor = minor;
+ d->maxbcnt = Dbcnt;
+ d->flag = Djumbo;
+ d->unit = newunit(); /* bzzt. inaccurate if units removed */
+ if(d->unit == -1){
+ free(d);
+ free(d->frames);
+ error("too many units");
+ }
+ d->dl = d->dltab;
+ return d;
+}
+
+static Aoedev*
+mm2dev(int major, int minor)
+{
+ Aoedev *d;
+
+ rlock(&devs);
+ for(d = devs.d; d; d = d->next)
+ if(d->major == major && d->minor == minor){
+ runlock(&devs);
+ return d;
+ }
+ runlock(&devs);
+ eventlog("mm2dev: %d.%d not found\n", major, minor);
+ return nil;
+}
+
+/* Find the device in our list. If not known, add it */
+static Aoedev*
+getdev(long major, long minor, int n)
+{
+ Aoedev *d;
+
+ if(major == 0xffff || minor == 0xff)
+ return 0;
+ wlock(&devs);
+ if(waserror()){
+ wunlock(&devs);
+ nexterror();
+ }
+ for(d = devs.d; d; d = d->next)
+ if(d->major == major && d->minor == minor)
+ break;
+ if (d == nil) {
+ d = newdev(major, minor, n);
+ d->next = devs.d;
+ devs.d = d;
+ }
+ poperror();
+ wunlock(&devs);
+ return d;
+}
+
+static ushort
+gbit16(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return i[1] << 8 | i[0];
+}
+
+static ulong
+gbit32(void *a)
+{
+ ulong j;
+ uchar *i;
+
+ i = a;
+ j = i[3] << 24;
+ j |= i[2] << 16;
+ j |= i[1] << 8;
+ j |= i[0];
+ return j;
+}
+
+static uvlong
+gbit64(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return (uvlong)gbit32(i+4) << 32 | gbit32(a);
+}
+
+static void
+ataident(Aoedev *d)
+{
+ Aoeata *a;
+ Block *b;
+ Frame *f;
+
+ f = freeframe(d);
+ if(f == nil)
+ return;
+ f->nhdr = AOEATASZ;
+ memset(f->hdr, 0, f->nhdr);
+ a = (Aoeata*)f->hdr;
+ if(hset(d, f, a, ACata) == -1)
+ return;
+ a->cmdstat = Cid; /* ata 6, page 110 */
+ a->scnt = 1;
+ a->lba[3] = 0xa0;
+ d->nout++;
+ f->dl->npkt++;
+ f->bcnt = 512;
+ f->dlen = 0;
+ b = allocfb(f);
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
+}
+
+static int
+getmtu(Chan *m)
+{
+ int n, mtu;
+ char buf[36];
+
+ mtu = 1514;
+ if(m == nil || waserror())
+ return mtu;
+ n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
+ if(n > 12){
+ buf[n] = 0;
+ mtu = strtoul(buf + 12, 0, 0);
+ }
+ poperror();
+ return mtu;
+}
+
+static int
+newdlea(Devlink *l, uchar *ea)
+{
+ int i;
+ uchar *t;
+
+ for(i = 0; i < Nea; i++){
+ t = l->eatab[i];
+ if(i == l->nea){
+ memmove(t, ea, Eaddrlen);
+ return l->nea++;
+ }
+ if(memcmp(t, ea, Eaddrlen) == 0)
+ return i;
+ }
+ return -1;
+}
+
+static Devlink*
+newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
+{
+ int i;
+ Devlink *l;
+
+ for(i = 0; i < Ndevlink; i++){
+ l = d->dl + i;
+ if(i == d->ndl){
+ d->ndl++;
+ newdlea(l, c->src);
+ l->nl = n;
+ l->flag |= Dup;
+ l->mintimer = Rtmin;
+ l->rttavg = Rtmax;
+ return l;
+ }
+ if(l->nl == n) {
+ newdlea(l, c->src);
+ l->flag |= Dup;
+ return l;
+ }
+ }
+ eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
+ return 0;
+}
+
+static void
+errrsp(Block *b, char *s)
+{
+ int n;
+ Aoedev *d;
+ Aoehdr *h;
+ Frame *f;
+
+ h = (Aoehdr*)b->rp;
+ n = nhgetl(h->tag);
+ if(n == Tmgmt || n == Tfree)
+ return;
+ d = mm2dev(nhgets(h->major), h->minor);
+ if(d == 0)
+ return;
+ if(f = getframe(d, n))
+ frameerror(d, f, s);
+}
+
+static void
+qcfgrsp(Block *b, Netlink *nl)
+{
+ int major, cmd, cslen, blen;
+ unsigned n;
+ Aoedev *d;
+ Aoeqc *ch;
+ Devlink *l;
+ Frame *f;
+
+ ch = (Aoeqc*)b->rp;
+ major = nhgets(ch->major);
+ n = nhgetl(ch->tag);
+ if(n != Tmgmt){
+ d = mm2dev(major, ch->minor);
+ if(d == nil)
+ return;
+ qlock(d);
+ f = getframe(d, n);
+ if(f == nil){
+ qunlock(d);
+ eventlog("%æ: unknown response tag %ux\n", d, n);
+ return;
+ }
+ cslen = nhgets(ch->cslen);
+ blen = BLEN(b) - AOEQCSZ;
+ if(cslen < blen && BLEN(b) > 60)
+ eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
+ d, n, cslen, blen);
+ if(cslen > blen){
+ eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
+ d, n, cslen, blen);
+ cslen = blen;
+ }
+ memmove(f->dp, ch + 1, cslen);
+ f->srb->nout--;
+ wakeup(f->srb);
+ d->nout--;
+ f->srb = nil;
+ f->tag = Tfree;
+ qunlock(d);
+ return;
+ }
+
+ cmd = ch->verccmd & 0xf;
+ if(cmd != 0){
+ eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
+ return;
+ }
+ n = nhgets(ch->bufcnt);
+ if(n > Maxframes)
+ n = Maxframes;
+
+ if(waserror()){
+ eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
+ return;
+ }
+ d = getdev(major, ch->minor, n);
+ poperror();
+ if(d == 0)
+ return;
+
+ qlock(d);
+ *up->errstr = 0;
+ if(waserror()){
+ qunlock(d);
+ eventlog("%æ: %s\n", d, up->errstr);
+ nexterror();
+ }
+
+ l = newdevlink(d, nl, ch); /* add this interface. */
+
+ d->fwver = nhgets(ch->fwver);
+ n = nhgets(ch->cslen);
+ if(n > sizeof d->config)
+ n = sizeof d->config;
+ d->nconfig = n;
+ memmove(d->config, ch + 1, n);
+ if(l != 0 && d->flag & Djumbo){
+ n = getmtu(nl->mtu) - AOEATASZ;
+ n /= Aoesectsz;
+ if(n > ch->scnt)
+ n = ch->scnt;
+ n = n? n * Aoesectsz: Dbcnt;
+ if(n != d->maxbcnt){
+ eventlog("%æ: setting %d byte data frames on %s:%E\n",
+ d, n, nl->path, nl->ea);
+ d->maxbcnt = n;
+ }
+ }
+ if(d->nopen == 0)
+ ataident(d);
+ poperror();
+ qunlock(d);
+}
+
+void
+aoeidmove(char *p, ushort *u, unsigned n)
+{
+ int i;
+ char *op, *e, *s;
+
+ op = p;
+ /*
+ * the ushort `*u' is sometimes not aligned on a short boundary,
+ * so dereferencing u[i] causes an alignment exception on
+ * some machines.
+ */
+ s = (char *)u;
+ for(i = 0; i < n; i += 2){
+ *p++ = s[i + 1];
+ *p++ = s[i];
+ }
+ *p = 0;
+ while(p > op && *--p == ' ')
+ *p = 0;
+ e = p;
+ p = op;
+ while(*p == ' ')
+ p++;
+ memmove(op, p, n - (e - p));
+}
+
+static vlong
+aoeidentify(Aoedev *d, ushort *id)
+{
+ int i;
+ vlong s;
+
+ d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
+
+ i = gbit16(id+83) | gbit16(id+86);
+ if(i & (1<<10)){
+ d->flag |= Dllba;
+ s = gbit64(id+100);
+ }else
+ s = gbit32(id+60);
+
+ i = gbit16(id+83);
+ if((i>>14) == 1) {
+ if(i & (1<<3))
+ d->flag |= Dpower;
+ i = gbit16(id+82);
+ if(i & 1)
+ d->flag |= Dsmart;
+ if(i & (1<<14))
+ d->flag |= Dnop;
+ }
+// eventlog("%æ up\n", d);
+ d->flag |= Dup;
+ memmove(d->ident, id, sizeof d->ident);
+ return s;
+}
+
+static void
+newvers(Aoedev *d)
+{
+ lock(&drivevers);
+ d->vers = drivevers.ref++;
+ unlock(&drivevers);
+}
+
+static int
+identify(Aoedev *d, ushort *id)
+{
+ vlong osectors, s;
+ uchar oserial[21];
+
+ s = aoeidentify(d, id);
+ if(s == -1)
+ return -1;
+ osectors = d->realbsize;
+ memmove(oserial, d->serial, sizeof d->serial);
+
+ aoeidmove(d->serial, id+10, 20);
+ aoeidmove(d->firmware, id+23, 8);
+ aoeidmove(d->model, id+27, 40);
+
+ s *= Aoesectsz;
+ if((osectors == 0 || osectors != s) &&
+ memcmp(oserial, d->serial, sizeof oserial) != 0){
+ d->bsize = s;
+ d->realbsize = s;
+// d->mediachange = 1;
+ newvers(d);
+ }
+ return 0;
+}
+
+static void
+atarsp(Block *b)
+{
+ unsigned n;
+ short major;
+ Aoeata *ahin, *ahout;
+ Aoedev *d;
+ Frame *f;
+ Srb *srb;
+
+ ahin = (Aoeata*)b->rp;
+ major = nhgets(ahin->major);
+ d = mm2dev(major, ahin->minor);
+ if(d == nil)
+ return;
+ qlock(d);
+ if(waserror()){
+ qunlock(d);
+ nexterror();
+ }
+ n = nhgetl(ahin->tag);
+ f = getframe(d, n);
+ if(f == nil){
+ dprint("%æ: unexpected response; tag %ux\n", d, n);
+ goto bail;
+ }
+ rtupdate(f->dl, tsince(f->tag));
+ ahout = (Aoeata*)f->hdr;
+ srb = f->srb;
+
+ if(ahin->cmdstat & 0xa9){
+ eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
+ d, ahout->cmdstat, ahin->cmdstat);
+ if(srb)
+ srb->error = Eio;
+ } else {
+ n = ahout->scnt * Aoesectsz;
+ switch(ahout->cmdstat){
+ case Crd:
+ case Crdext:
+ if(BLEN(b) - AOEATASZ < n){
+ eventlog("%æ: runt read blen %ld expect %d\n",
+ d, BLEN(b), n);
+ goto bail;
+ }
+ memmove(f->dp, (uchar *)ahin + AOEATASZ, n);
+ case Cwr:
+ case Cwrext:
+ if(n > Dbcnt)
+ f->nl->lostjumbo = 0;
+ if(f->bcnt -= n){
+ f->lba += n / Aoesectsz;
+ f->dp = (uchar*)f->dp + n;
+ resend(d, f);
+ goto bail;
+ }
+ break;
+ case Cid:
+ if(BLEN(b) - AOEATASZ < 512){
+ eventlog("%æ: runt identify blen %ld expect %d\n",
+ d, BLEN(b), n);
+ goto bail;
+ }
+ identify(d, (ushort*)((uchar *)ahin + AOEATASZ));
+ break;
+ default:
+ eventlog("%æ: unknown ata command %.2ux \n",
+ d, ahout->cmdstat);
+ }
+ }
+
+ if(srb && --srb->nout == 0 && srb->len == 0)
+ wakeup(srb);
+ f->srb = nil;
+ f->tag = Tfree;
+ d->nout--;
+
+ work(d);
+bail:
+ poperror();
+ qunlock(d);
+}
+
+static void
+netrdaoeproc(void *v)
+{
+ int idx;
+ char name[Maxpath+1], *s;
+ Aoehdr *h;
+ Block *b;
+ Netlink *nl;
+
+ nl = (Netlink*)v;
+ idx = nl - netlinks.nl;
+ netlinks.reader[idx] = 1;
+ kstrcpy(name, nl->path, Maxpath);
+
+ if(waserror()){
+ eventlog("netrdaoe exiting: %s\n", up->errstr);
+ netlinks.reader[idx] = 0;
+ wakeup(netlinks.rendez + idx);
+ pexit(up->errstr, 1);
+ }
+ if(autodiscover)
+ discover(0xffff, 0xff);
+ for (;;) {
+ if(!(nl->flag & Dup)) {
+ uprint("%s: netlink is down", name);
+ error(up->genbuf);
+ }
+ if (nl->dc == nil)
+ panic("netrdaoe: nl->dc == nil");
+ b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
+ if(b == nil) {
+ uprint("%s: nil read from network", name);
+ error(up->genbuf);
+ }
+ h = (Aoehdr*)b->rp;
+ if(h->verflag & AFrsp)
+ if(s = aoeerror(h)){
+ eventlog("%s: %s\n", nl->path, up->errstr);
+ errrsp(b, s);
+ }else
+ switch(h->cmd){
+ case ACata:
+ atarsp(b);
+ break;
+ case ACconfig:
+ qcfgrsp(b, nl);
+ break;
+ default:
+ if((h->cmd & 0xf0) == 0){
+ eventlog("%s: unknown cmd %d\n",
+ nl->path, h->cmd);
+ errrsp(b, "unknown command");
+ }
+ break;
+ }
+ freeb(b);
+ }
+}
+
+static void
+getaddr(char *path, uchar *ea)
+{
+ int n;
+ char buf[2*Eaddrlen+1];
+ Chan *c;
+
+ uprint("%s/addr", path);
+ c = namec(up->genbuf, Aopen, OREAD, 0);
+ if(waserror()) {
+ cclose(c);
+ nexterror();
+ }
+ if (c == nil)
+ panic("æ: getaddr: c == nil");
+ n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
+ poperror();
+ cclose(c);
+ buf[n] = 0;
+ if(parseether(ea, buf) < 0)
+ error("parseether failure");
+}
+
+static void
+netbind(char *path)
+{
+ char addr[Maxpath];
+ uchar ea[2*Eaddrlen+1];
+ Chan *dc, *cc, *mtu;
+ Netlink *nl;
+
+ snprint(addr, sizeof addr, "%s!%#x", path, Aoetype);
+ dc = chandial(addr, nil, nil, &cc);
+ snprint(addr, sizeof addr, "%s/mtu", path);
+ if(waserror())
+ mtu = nil;
+ else {
+ mtu = namec(addr, Aopen, OREAD, 0);
+ poperror();
+ }
+
+ if(waserror()){
+ cclose(dc);
+ cclose(cc);
+ if(mtu)
+ cclose(mtu);
+ nexterror();
+ }
+ if(dc == nil || cc == nil)
+ error(Enonexist);
+ getaddr(path, ea);
+ nl = addnet(path, cc, dc, mtu, ea);
+ snprint(addr, sizeof addr, "netrdaoe@%s", path);
+ kproc(addr, netrdaoeproc, nl);
+ poperror();
+}
+
+static int
+unbound(void *v)
+{
+ return *(int*)v != 0;
+}
+
+static void
+netunbind(char *path)
+{
+ int i, idx;
+ Aoedev *d, *p, *next;
+ Chan *dc, *cc;
+ Devlink *l;
+ Frame *f;
+ Netlink *n, *e;
+
+ n = netlinks.nl;
+ e = n + nelem(netlinks.nl);
+
+ lock(&netlinks);
+ for(; n < e; n++)
+ if(n->dc && strcmp(n->path, path) == 0)
+ break;
+ unlock(&netlinks);
+ if (n >= e)
+ error("device not bound");
+
+ /*
+ * hunt down devices using this interface; disable
+ * this also terminates the reader.
+ */
+ idx = n - netlinks.nl;
+ wlock(&devs);
+ for(d = devs.d; d; d = d->next){
+ qlock(d);
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ if(l->nl == n)
+ l->flag &= ~Dup;
+ }
+ qunlock(d);
+ }
+ n->flag &= ~Dup;
+ wunlock(&devs);
+
+ /* confirm reader is down. */
+ while(waserror())
+ ;
+ sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
+ poperror();
+
+ /* reschedule packets. */
+ wlock(&devs);
+ for(d = devs.d; d; d = d->next){
+ qlock(d);
+ for(i = 0; i < d->nframes; i++){
+ f = d->frames + i;
+ if(f->tag != Tfree && f->nl == n)
+ resend(d, f);
+ }
+ qunlock(d);
+ }
+ wunlock(&devs);
+
+ /* squeeze devlink pool. (we assert nobody is using them now) */
+ wlock(&devs);
+ for(d = devs.d; d; d = d->next){
+ qlock(d);
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ if(l->nl == n)
+ memmove(l, l + 1, sizeof *l * (--d->ndl - i));
+ }
+ qunlock(d);
+ }
+ wunlock(&devs);
+
+ /* close device link. */
+ lock(&netlinks);
+ dc = n->dc;
+ cc = n->cc;
+ if(n->mtu)
+ cclose(n->mtu);
+ memset(n, 0, sizeof *n);
+ unlock(&netlinks);
+
+ cclose(dc);
+ cclose(cc);
+
+ /* squeeze orphan devices */
+ wlock(&devs);
+ for(p = d = devs.d; d; d = next){
+ next = d->next;
+ if(d->ndl > 0) {
+ p = d;
+ continue;
+ }
+ qlock(d);
+ downdev(d, "orphan");
+ qunlock(d);
+ if(p != devs.d)
+ p->next = next;
+ else{
+ devs.d = next;
+ p = devs.d;
+ }
+ free(d->frames);
+ free(d);
+ dropunit();
+ }
+ wunlock(&devs);
+}
+
+static void
+removeaoedev(Aoedev *d)
+{
+ int i;
+ Aoedev *p;
+
+ wlock(&devs);
+ p = 0;
+ if(d != devs.d)
+ for(p = devs.d; p; p = p->next)
+ if(p->next == d)
+ break;
+ qlock(d);
+ d->flag &= ~Dup;
+ newvers(d);
+ d->ndl = 0;
+ qunlock(d);
+ for(i = 0; i < d->nframes; i++)
+ frameerror(d, d->frames+i, Enotup);
+
+ if(p)
+ p->next = d->next;
+ else
+ devs.d = d->next;
+ free(d->frames);
+ free(d);
+ dropunit();
+ wunlock(&devs);
+}
+
+static void
+removedev(char *name)
+{
+ Aoedev *d, *p;
+
+ wlock(&devs);
+ for(p = d = devs.d; d; p = d, d = d->next)
+ if(strcmp(name, unitname(d)) == 0) {
+ wunlock(&devs);
+ removeaoedev(p);
+ return;
+ }
+ wunlock(&devs);
+ error("device not bound");
+}
+
+static void
+discoverstr(char *f)
+{
+ ushort shelf, slot;
+ ulong sh;
+ char *s;
+
+ if(f == 0){
+ discover(0xffff, 0xff);
+ return;
+ }
+
+ shelf = sh = strtol(f, &s, 0);
+ if(s == f || sh > 0xffff)
+ error("bad shelf");
+ f = s;
+ if(*f++ == '.'){
+ slot = strtol(f, &s, 0);
+ if(s == f || slot > 0xff)
+ error("bad shelf");
+ }else
+ slot = 0xff;
+ discover(shelf, slot);
+}
+
+
+static void
+aoeremove(Chan *c)
+{
+ switch(TYPE(c->qid)){
+ default:
+ error(Eperm);
+ case Qunitdir:
+ removeaoedev(unit2dev(UNIT(c->qid)));
+ break;
+ }
+}
+
+static long
+topctlwrite(void *db, long n)
+{
+ enum {
+ Autodiscover,
+ Bind,
+ Debug,
+ Discover,
+ Rediscover,
+ Remove,
+ Unbind,
+ };
+ char *f;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+ static Cmdtab cmds[] = {
+ { Autodiscover, "autodiscover", 0 },
+ { Bind, "bind", 2 },
+ { Debug, "debug", 0 },
+ { Discover, "discover", 0 },
+ { Rediscover, "rediscover", 0 },
+ { Remove, "remove", 2 },
+ { Unbind, "unbind", 2 },
+ };
+
+ cb = parsecmd(db, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+ ct = lookupcmd(cb, cmds, nelem(cmds));
+ f = cb->f[1];
+ switch(ct->index){
+ case Autodiscover:
+ autodiscover = toggle(f, autodiscover);
+ break;
+ case Bind:
+ netbind(f);
+ break;
+ case Debug:
+ debug = toggle(f, debug);
+ break;
+ case Discover:
+ discoverstr(f);
+ break;
+ case Rediscover:
+ rediscover = toggle(f, rediscover);
+ break;
+ case Remove:
+ removedev(f);
+ break;
+ case Unbind:
+ netunbind(f);
+ break;
+ default:
+ cmderror(cb, "unknown aoe control message");
+ }
+ poperror();
+ free(cb);
+ return n;
+}
+
+static long
+aoewrite(Chan *c, void *db, long n, vlong off)
+{
+ switch(TYPE(c->qid)){
+ default:
+ case Qzero:
+ case Qtopdir:
+ case Qunitdir:
+ case Qtoplog:
+ error(Eperm);
+ case Qtopctl:
+ return topctlwrite(db, n);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitwrite(c, db, n, off);
+ }
+}
+
+Dev aoedevtab = {
+ L'æ',
+ "aoe",
+
+ devreset,
+ devinit,
+ devshutdown,
+ aoeattach,
+ aoewalk,
+ aoestat,
+ aoeopen,
+ devcreate,
+ aoeclose,
+ aoeread,
+ devbread,
+ aoewrite,
+ devbwrite,
+ aoeremove,
+ devwstat,
+ devpower,
+ devconfig,
+};