summaryrefslogtreecommitdiff
path: root/sys/src/9/port/devbridge.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/9/port/devbridge.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/9/port/devbridge.c')
-rwxr-xr-xsys/src/9/port/devbridge.c1195
1 files changed, 1195 insertions, 0 deletions
diff --git a/sys/src/9/port/devbridge.c b/sys/src/9/port/devbridge.c
new file mode 100755
index 000000000..be416a249
--- /dev/null
+++ b/sys/src/9/port/devbridge.c
@@ -0,0 +1,1195 @@
+/*
+ * IPv4 Ethernet bridge
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../ip/ip.h"
+#include "../port/netif.h"
+#include "../port/error.h"
+
+typedef struct Bridge Bridge;
+typedef struct Port Port;
+typedef struct Centry Centry;
+typedef struct Iphdr Iphdr;
+typedef struct Tcphdr Tcphdr;
+
+enum
+{
+ Qtopdir= 1, /* top level directory */
+
+ Qbridgedir, /* bridge* directory */
+ Qbctl,
+ Qstats,
+ Qcache,
+ Qlog,
+
+ Qportdir, /* directory for a protocol */
+ Qpctl,
+ Qlocal,
+ Qstatus,
+
+ MaxQ,
+
+ Maxbridge= 4,
+ Maxport= 128, // power of 2
+ CacheHash= 257, // prime
+ CacheLook= 5, // how many cache entries to examine
+ CacheSize= (CacheHash+CacheLook-1),
+ CacheTimeout= 5*60, // timeout for cache entry in seconds
+
+ TcpMssMax = 1300, // max desirable Tcp MSS value
+ TunnelMtu = 1400,
+};
+
+static Dirtab bridgedirtab[]={
+ "ctl", {Qbctl}, 0, 0666,
+ "stats", {Qstats}, 0, 0444,
+ "cache", {Qcache}, 0, 0444,
+ "log", {Qlog}, 0, 0666,
+};
+
+static Dirtab portdirtab[]={
+ "ctl", {Qpctl}, 0, 0666,
+ "local", {Qlocal}, 0, 0444,
+ "status", {Qstatus}, 0, 0444,
+};
+
+enum {
+ Logcache= (1<<0),
+ Logmcast= (1<<1),
+};
+
+// types of interfaces
+enum
+{
+ Tether,
+ Ttun,
+};
+
+static Logflag logflags[] =
+{
+ { "cache", Logcache, },
+ { "multicast", Logmcast, },
+ { nil, 0, },
+};
+
+static Dirtab *dirtab[MaxQ];
+
+#define TYPE(x) (((ulong)(x).path) & 0xff)
+#define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
+#define QID(x, y) (((x)<<8) | (y))
+
+struct Centry
+{
+ uchar d[Eaddrlen];
+ int port;
+ long expire; // entry expires this many seconds after bootime
+ long src;
+ long dst;
+};
+
+struct Bridge
+{
+ QLock;
+ int nport;
+ Port *port[Maxport];
+ Centry cache[CacheSize];
+ ulong hit;
+ ulong miss;
+ ulong copy;
+ long delay0; // constant microsecond delay per packet
+ long delayn; // microsecond delay per byte
+ int tcpmss; // modify tcpmss value
+
+ Log;
+};
+
+struct Port
+{
+ int id;
+ Bridge *bridge;
+ int ref;
+ int closed;
+
+ Chan *data[2]; // channel to data
+
+ Proc *readp; // read proc
+
+ // the following uniquely identifies the port
+ int type;
+ char name[KNAMELEN];
+
+ // owner hash - avoids bind/unbind races
+ ulong ownhash;
+
+ // various stats
+ int in; // number of packets read
+ int inmulti; // multicast or broadcast
+ int inunknown; // unknown address
+ int out; // number of packets read
+ int outmulti; // multicast or broadcast
+ int outunknown; // unknown address
+ int outfrag; // fragmented the packet
+ int nentry; // number of cache entries for this port
+};
+
+enum {
+ IP_TCPPROTO = 6,
+ EOLOPT = 0,
+ NOOPOPT = 1,
+ MSSOPT = 2,
+ MSS_LENGTH = 4, /* Mean segment size */
+ SYN = 0x02, /* Pkt. is synchronise */
+ IPHDR = 20, /* sizeof(Iphdr) */
+};
+
+struct Iphdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+};
+
+struct Tcphdr
+{
+ uchar sport[2];
+ uchar dport[2];
+ uchar seq[4];
+ uchar ack[4];
+ uchar flag[2];
+ uchar win[2];
+ uchar cksum[2];
+ uchar urg[2];
+};
+
+static Bridge bridgetab[Maxbridge];
+
+static int m2p[] = {
+ [OREAD] 4,
+ [OWRITE] 2,
+ [ORDWR] 6
+};
+
+static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
+static void portbind(Bridge *b, int argc, char *argv[]);
+static void portunbind(Bridge *b, int argc, char *argv[]);
+static void etherread(void *a);
+static char *cachedump(Bridge *b);
+static void portfree(Port *port);
+static void cacheflushport(Bridge *b, int port);
+static void etherwrite(Port *port, Block *bp);
+
+static void
+bridgeinit(void)
+{
+ int i;
+ Dirtab *dt;
+
+ // setup dirtab with non directory entries
+ for(i=0; i<nelem(bridgedirtab); i++) {
+ dt = bridgedirtab + i;
+ dirtab[TYPE(dt->qid)] = dt;
+ }
+ for(i=0; i<nelem(portdirtab); i++) {
+ dt = portdirtab + i;
+ dirtab[TYPE(dt->qid)] = dt;
+ }
+}
+
+static Chan*
+bridgeattach(char* spec)
+{
+ Chan *c;
+ int dev;
+
+ dev = atoi(spec);
+ if(dev<0 || dev >= Maxbridge)
+ error("bad specification");
+
+ c = devattach('B', spec);
+ mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
+ c->dev = dev;
+ return c;
+}
+
+static Walkqid*
+bridgewalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
+}
+
+static int
+bridgestat(Chan* c, uchar* db, int n)
+{
+ return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
+}
+
+static Chan*
+bridgeopen(Chan* c, int omode)
+{
+ int perm;
+ Bridge *b;
+
+ omode &= 3;
+ perm = m2p[omode];
+ USED(perm);
+
+ b = bridgetab + c->dev;
+ USED(b);
+
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qlog:
+ logopen(b);
+ break;
+ case Qcache:
+ c->aux = cachedump(b);
+ break;
+ }
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+ return c;
+}
+
+static void
+bridgeclose(Chan* c)
+{
+ Bridge *b = bridgetab + c->dev;
+
+ switch(TYPE(c->qid)) {
+ case Qcache:
+ if(c->flag & COPEN)
+ free(c->aux);
+ break;
+ case Qlog:
+ if(c->flag & COPEN)
+ logclose(b);
+ break;
+ }
+}
+
+static long
+bridgeread(Chan *c, void *a, long n, vlong off)
+{
+ char buf[256];
+ Bridge *b = bridgetab + c->dev;
+ Port *port;
+ int i, ingood, outgood;
+
+ USED(off);
+ switch(TYPE(c->qid)) {
+ default:
+ error(Eperm);
+ case Qtopdir:
+ case Qbridgedir:
+ case Qportdir:
+ return devdirread(c, a, n, 0, 0, bridgegen);
+ case Qlog:
+ return logread(b, a, off, n);
+ case Qstatus:
+ qlock(b);
+ port = b->port[PORT(c->qid)];
+ if(port == 0)
+ strcpy(buf, "unbound\n");
+ else {
+ i = 0;
+ switch(port->type) {
+ default:
+ panic("bridgeread: unknown port type: %d",
+ port->type);
+ case Tether:
+ i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
+ break;
+ case Ttun:
+ i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
+ break;
+ }
+ ingood = port->in - port->inmulti - port->inunknown;
+ outgood = port->out - port->outmulti - port->outunknown;
+ i += snprint(buf+i, sizeof(buf)-i,
+ "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
+ port->in, ingood, port->inmulti, port->inunknown,
+ port->out, outgood, port->outmulti,
+ port->outunknown, port->outfrag);
+ USED(i);
+ }
+ n = readstr(off, a, n, buf);
+ qunlock(b);
+ return n;
+ case Qbctl:
+ snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
+ b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
+ n = readstr(off, a, n, buf);
+ return n;
+ case Qcache:
+ n = readstr(off, a, n, c->aux);
+ return n;
+ case Qstats:
+ snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
+ b->hit, b->miss, b->copy);
+ n = readstr(off, a, n, buf);
+ return n;
+ }
+}
+
+static void
+bridgeoption(Bridge *b, char *option, int value)
+{
+ if(strcmp(option, "tcpmss") == 0)
+ b->tcpmss = value;
+ else
+ error("unknown bridge option");
+}
+
+
+static long
+bridgewrite(Chan *c, void *a, long n, vlong off)
+{
+ Bridge *b = bridgetab + c->dev;
+ Cmdbuf *cb;
+ char *arg0, *p;
+
+ USED(off);
+ switch(TYPE(c->qid)) {
+ default:
+ error(Eperm);
+ case Qbctl:
+ cb = parsecmd(a, n);
+ qlock(b);
+ if(waserror()) {
+ qunlock(b);
+ free(cb);
+ nexterror();
+ }
+ if(cb->nf == 0)
+ error("short write");
+ arg0 = cb->f[0];
+ if(strcmp(arg0, "bind") == 0) {
+ portbind(b, cb->nf-1, cb->f+1);
+ } else if(strcmp(arg0, "unbind") == 0) {
+ portunbind(b, cb->nf-1, cb->f+1);
+ } else if(strcmp(arg0, "cacheflush") == 0) {
+ log(b, Logcache, "cache flush\n");
+ memset(b->cache, 0, CacheSize*sizeof(Centry));
+ } else if(strcmp(arg0, "set") == 0) {
+ if(cb->nf != 2)
+ error("usage: set option");
+ bridgeoption(b, cb->f[1], 1);
+ } else if(strcmp(arg0, "clear") == 0) {
+ if(cb->nf != 2)
+ error("usage: clear option");
+ bridgeoption(b, cb->f[1], 0);
+ } else if(strcmp(arg0, "delay") == 0) {
+ if(cb->nf != 3)
+ error("usage: delay delay0 delayn");
+ b->delay0 = strtol(cb->f[1], nil, 10);
+ b->delayn = strtol(cb->f[2], nil, 10);
+ } else
+ error("unknown control request");
+ poperror();
+ qunlock(b);
+ free(cb);
+ return n;
+ case Qlog:
+ cb = parsecmd(a, n);
+ p = logctl(b, cb->nf, cb->f, logflags);
+ free(cb);
+ if(p != nil)
+ error(p);
+ return n;
+ }
+}
+
+static int
+bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
+{
+ Bridge *b = bridgetab + c->dev;
+ int type = TYPE(c->qid);
+ Dirtab *dt;
+ Qid qid;
+
+ if(s == DEVDOTDOT){
+ switch(TYPE(c->qid)){
+ case Qtopdir:
+ case Qbridgedir:
+ snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
+ mkqid(&qid, Qtopdir, 0, QTDIR);
+ devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+ break;
+ case Qportdir:
+ snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
+ mkqid(&qid, Qbridgedir, 0, QTDIR);
+ devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+ break;
+ default:
+ panic("bridgewalk %llux", c->qid.path);
+ }
+ return 1;
+ }
+
+ switch(type) {
+ default:
+ /* non-directory entries end up here */
+ if(c->qid.type & QTDIR)
+ panic("bridgegen: unexpected directory");
+ if(s != 0)
+ return -1;
+ dt = dirtab[TYPE(c->qid)];
+ if(dt == nil)
+ panic("bridgegen: unknown type: %lud", TYPE(c->qid));
+ devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
+ return 1;
+ case Qtopdir:
+ if(s != 0)
+ return -1;
+ snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
+ mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
+ devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+ return 1;
+ case Qbridgedir:
+ if(s<nelem(bridgedirtab)) {
+ dt = bridgedirtab+s;
+ devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
+ return 1;
+ }
+ s -= nelem(bridgedirtab);
+ if(s >= b->nport)
+ return -1;
+ mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
+ snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
+ devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
+ return 1;
+ case Qportdir:
+ if(s>=nelem(portdirtab))
+ return -1;
+ dt = portdirtab+s;
+ mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
+ devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
+ return 1;
+ }
+}
+
+// parse mac address; also in netif.c
+static int
+parseaddr(uchar *to, char *from, int alen)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ for(i = 0; i < alen; i++){
+ if(*p == 0)
+ return -1;
+ nip[0] = *p++;
+ if(*p == 0)
+ return -1;
+ nip[1] = *p++;
+ nip[2] = 0;
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return 0;
+}
+
+// assumes b is locked
+static void
+portbind(Bridge *b, int argc, char *argv[])
+{
+ Port *port;
+ Chan *ctl;
+ int type = 0, i, n;
+ ulong ownhash;
+ char *dev, *dev2 = nil, *p;
+ char buf[100], name[KNAMELEN], path[8*KNAMELEN];
+ static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
+
+ memset(name, 0, KNAMELEN);
+ if(argc < 4)
+ error(usage);
+ if(strcmp(argv[0], "ether") == 0) {
+ if(argc != 4)
+ error(usage);
+ type = Tether;
+ strncpy(name, argv[1], KNAMELEN);
+ name[KNAMELEN-1] = 0;
+// parseaddr(addr, argv[1], Eaddrlen);
+ } else if(strcmp(argv[0], "tunnel") == 0) {
+ if(argc != 5)
+ error(usage);
+ type = Ttun;
+ strncpy(name, argv[1], KNAMELEN);
+ name[KNAMELEN-1] = 0;
+// parseip(addr, argv[1]);
+ dev2 = argv[4];
+ } else
+ error(usage);
+ ownhash = atoi(argv[2]);
+ dev = argv[3];
+ for(i=0; i<b->nport; i++) {
+ port = b->port[i];
+ if(port != nil && port->type == type &&
+ memcmp(port->name, name, KNAMELEN) == 0)
+ error("port in use");
+ }
+ for(i=0; i<Maxport; i++)
+ if(b->port[i] == nil)
+ break;
+ if(i == Maxport)
+ error("no more ports");
+ port = smalloc(sizeof(Port));
+ port->ref = 1;
+ port->id = i;
+ port->ownhash = ownhash;
+
+ if(waserror()) {
+ portfree(port);
+ nexterror();
+ }
+ port->type = type;
+ memmove(port->name, name, KNAMELEN);
+ switch(port->type) {
+ default:
+ panic("portbind: unknown port type: %d", type);
+ case Tether:
+ snprint(path, sizeof(path), "%s/clone", dev);
+ ctl = namec(path, Aopen, ORDWR, 0);
+ if(waserror()) {
+ cclose(ctl);
+ nexterror();
+ }
+ // check addr?
+
+ // get directory name
+ n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
+ buf[n] = 0;
+ for(p = buf; *p == ' '; p++)
+ ;
+ snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
+
+ // setup connection to be promiscuous
+ snprint(buf, sizeof(buf), "connect -1");
+ devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
+ snprint(buf, sizeof(buf), "promiscuous");
+ devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
+ snprint(buf, sizeof(buf), "bridge");
+ devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
+
+ // open data port
+ port->data[0] = namec(path, Aopen, ORDWR, 0);
+ // dup it
+ incref(port->data[0]);
+ port->data[1] = port->data[0];
+
+ poperror();
+ cclose(ctl);
+
+ break;
+ case Ttun:
+ port->data[0] = namec(dev, Aopen, OREAD, 0);
+ port->data[1] = namec(dev2, Aopen, OWRITE, 0);
+ break;
+ }
+
+ poperror();
+
+ /* committed to binding port */
+ b->port[port->id] = port;
+ port->bridge = b;
+ if(b->nport <= port->id)
+ b->nport = port->id+1;
+
+ // assumes kproc always succeeds
+ kproc("etherread", etherread, port); // poperror must be next
+ port->ref++;
+}
+
+// assumes b is locked
+static void
+portunbind(Bridge *b, int argc, char *argv[])
+{
+ int type = 0, i;
+ char name[KNAMELEN];
+ ulong ownhash;
+ Port *port = nil;
+ static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
+
+ memset(name, 0, KNAMELEN);
+ if(argc < 2 || argc > 3)
+ error(usage);
+ if(strcmp(argv[0], "ether") == 0) {
+ type = Tether;
+ strncpy(name, argv[1], KNAMELEN);
+ name[KNAMELEN-1] = 0;
+// parseaddr(addr, argv[1], Eaddrlen);
+ } else if(strcmp(argv[0], "tunnel") == 0) {
+ type = Ttun;
+ strncpy(name, argv[1], KNAMELEN);
+ name[KNAMELEN-1] = 0;
+// parseip(addr, argv[1]);
+ } else
+ error(usage);
+ if(argc == 3)
+ ownhash = atoi(argv[2]);
+ else
+ ownhash = 0;
+ for(i=0; i<b->nport; i++) {
+ port = b->port[i];
+ if(port != nil && port->type == type &&
+ memcmp(port->name, name, KNAMELEN) == 0)
+ break;
+ }
+ if(i == b->nport)
+ error("port not found");
+ if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
+ error("bad owner hash");
+
+ port->closed = 1;
+ b->port[i] = nil; // port is now unbound
+ cacheflushport(b, i);
+
+ // try and stop reader
+ if(port->readp)
+ postnote(port->readp, 1, "unbind", 0);
+ portfree(port);
+}
+
+// assumes b is locked
+static Centry *
+cachelookup(Bridge *b, uchar d[Eaddrlen])
+{
+ int i;
+ uint h;
+ Centry *p;
+ long sec;
+
+ // dont cache multicast or broadcast
+ if(d[0] & 1)
+ return 0;
+
+ h = 0;
+ for(i=0; i<Eaddrlen; i++) {
+ h *= 7;
+ h += d[i];
+ }
+ h %= CacheHash;
+ p = b->cache + h;
+ sec = TK2SEC(m->ticks);
+ for(i=0; i<CacheLook; i++,p++) {
+ if(memcmp(d, p->d, Eaddrlen) == 0) {
+ p->dst++;
+ if(sec >= p->expire) {
+ log(b, Logcache, "expired cache entry: %E %d\n",
+ d, p->port);
+ return nil;
+ }
+ p->expire = sec + CacheTimeout;
+ return p;
+ }
+ }
+ log(b, Logcache, "cache miss: %E\n", d);
+ return nil;
+}
+
+// assumes b is locked
+static void
+cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
+{
+ int i;
+ uint h;
+ Centry *p, *pp;
+ long sec;
+
+ // dont cache multicast or broadcast
+ if(d[0] & 1) {
+ log(b, Logcache, "bad source address: %E\n", d);
+ return;
+ }
+
+ h = 0;
+ for(i=0; i<Eaddrlen; i++) {
+ h *= 7;
+ h += d[i];
+ }
+ h %= CacheHash;
+ p = b->cache + h;
+ pp = p;
+ sec = p->expire;
+
+ // look for oldest entry
+ for(i=0; i<CacheLook; i++,p++) {
+ if(memcmp(p->d, d, Eaddrlen) == 0) {
+ p->expire = TK2SEC(m->ticks) + CacheTimeout;
+ if(p->port != port) {
+ log(b, Logcache, "NIC changed port %d->%d: %E\n",
+ p->port, port, d);
+ p->port = port;
+ }
+ p->src++;
+ return;
+ }
+ if(p->expire < sec) {
+ sec = p->expire;
+ pp = p;
+ }
+ }
+ if(pp->expire != 0)
+ log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
+ pp->expire = TK2SEC(m->ticks) + CacheTimeout;
+ memmove(pp->d, d, Eaddrlen);
+ pp->port = port;
+ pp->src = 1;
+ pp->dst = 0;
+ log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
+}
+
+// assumes b is locked
+static void
+cacheflushport(Bridge *b, int port)
+{
+ Centry *ce;
+ int i;
+
+ ce = b->cache;
+ for(i=0; i<CacheSize; i++,ce++) {
+ if(ce->port != port)
+ continue;
+ memset(ce, 0, sizeof(Centry));
+ }
+}
+
+static char *
+cachedump(Bridge *b)
+{
+ int i, n;
+ long sec, off;
+ char *buf, *p, *ep;
+ Centry *ce;
+ char c;
+
+ qlock(b);
+ if(waserror()) {
+ qunlock(b);
+ nexterror();
+ }
+ sec = TK2SEC(m->ticks);
+ n = 0;
+ for(i=0; i<CacheSize; i++)
+ if(b->cache[i].expire != 0)
+ n++;
+
+ n *= 51; // change if print format is changed
+ n += 10; // some slop at the end
+ buf = malloc(n);
+ p = buf;
+ ep = buf + n;
+ ce = b->cache;
+ off = seconds() - sec;
+ for(i=0; i<CacheSize; i++,ce++) {
+ if(ce->expire == 0)
+ continue;
+ c = (sec < ce->expire)?'v':'e';
+ p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
+ ce->port, ce->src, ce->dst, ce->expire+off, c);
+ }
+ *p = 0;
+ poperror();
+ qunlock(b);
+
+ return buf;
+}
+
+
+
+// assumes b is locked
+static void
+ethermultiwrite(Bridge *b, Block *bp, Port *port)
+{
+ Port *oport;
+ Block *bp2;
+ Etherpkt *ep;
+ int i, mcast;
+
+ if(waserror()) {
+ if(bp)
+ freeb(bp);
+ nexterror();
+ }
+
+ ep = (Etherpkt*)bp->rp;
+ mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
+
+ oport = nil;
+ for(i=0; i<b->nport; i++) {
+ if(i == port->id || b->port[i] == nil)
+ continue;
+ /*
+ * we need to forward multicast packets for ipv6,
+ * so always do it.
+ */
+ if(mcast)
+ b->port[i]->outmulti++;
+ else
+ b->port[i]->outunknown++;
+
+ // delay one so that the last write does not copy
+ if(oport != nil) {
+ b->copy++;
+ bp2 = copyblock(bp, blocklen(bp));
+ if(!waserror()) {
+ etherwrite(oport, bp2);
+ poperror();
+ }
+ }
+ oport = b->port[i];
+ }
+
+ // last write free block
+ if(oport) {
+ bp2 = bp; bp = nil; USED(bp);
+ if(!waserror()) {
+ etherwrite(oport, bp2);
+ poperror();
+ }
+ } else
+ freeb(bp);
+
+ poperror();
+}
+
+static void
+tcpmsshack(Etherpkt *epkt, int n)
+{
+ int hl, optlen;
+ Iphdr *iphdr;
+ Tcphdr *tcphdr;
+ ulong mss, cksum;
+ uchar *optr;
+
+ /* ignore non-ipv4 packets */
+ if(nhgets(epkt->type) != ETIP4)
+ return;
+ iphdr = (Iphdr*)(epkt->data);
+ n -= ETHERHDRSIZE;
+ if(n < IPHDR)
+ return;
+
+ /* ignore bad packets */
+ if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
+ hl = (iphdr->vihl&0xF)<<2;
+ if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
+ return;
+ } else
+ hl = IP_HLEN4<<2;
+
+ /* ignore non-tcp packets */
+ if(iphdr->proto != IP_TCPPROTO)
+ return;
+ n -= hl;
+ if(n < sizeof(Tcphdr))
+ return;
+ tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
+ // MSS can only appear in SYN packet
+ if(!(tcphdr->flag[1] & SYN))
+ return;
+ hl = (tcphdr->flag[0] & 0xf0)>>2;
+ if(n < hl)
+ return;
+
+ // check for MSS option
+ optr = (uchar*)tcphdr + sizeof(Tcphdr);
+ n = hl - sizeof(Tcphdr);
+ for(;;) {
+ if(n <= 0 || *optr == EOLOPT)
+ return;
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ return;
+ if(*optr == MSSOPT && optlen == MSS_LENGTH)
+ break;
+ n -= optlen;
+ optr += optlen;
+ }
+
+ mss = nhgets(optr+2);
+ if(mss <= TcpMssMax)
+ return;
+ // fit checksum
+ cksum = nhgets(tcphdr->cksum);
+ if(optr-(uchar*)tcphdr & 1) {
+print("tcpmsshack: odd alignment!\n");
+ // odd alignments are a pain
+ cksum += nhgets(optr+1);
+ cksum -= (optr[1]<<8)|(TcpMssMax>>8);
+ cksum += (cksum>>16);
+ cksum &= 0xffff;
+ cksum += nhgets(optr+3);
+ cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
+ cksum += (cksum>>16);
+ } else {
+ cksum += mss;
+ cksum -= TcpMssMax;
+ cksum += (cksum>>16);
+ }
+ hnputs(tcphdr->cksum, cksum);
+ hnputs(optr+2, TcpMssMax);
+}
+
+/*
+ * process to read from the ethernet
+ */
+static void
+etherread(void *a)
+{
+ Port *port = a;
+ Bridge *b = port->bridge;
+ Block *bp, *bp2;
+ Etherpkt *ep;
+ Centry *ce;
+ long md;
+
+ qlock(b);
+ port->readp = up; /* hide identity under a rock for unbind */
+
+ while(!port->closed){
+ // release lock to read - error means it is time to quit
+ qunlock(b);
+ if(waserror()) {
+ print("etherread read error: %s\n", up->errstr);
+ qlock(b);
+ break;
+ }
+ if(0)
+ print("devbridge: etherread: reading\n");
+ bp = devtab[port->data[0]->type]->bread(port->data[0],
+ ETHERMAXTU, 0);
+ if(0)
+ print("devbridge: etherread: blocklen = %d\n",
+ blocklen(bp));
+ poperror();
+ qlock(b);
+ if(bp == nil || port->closed)
+ break;
+ if(waserror()) {
+// print("etherread bridge error\n");
+ if(bp)
+ freeb(bp);
+ continue;
+ }
+ if(blocklen(bp) < ETHERMINTU)
+ error("short packet");
+ port->in++;
+
+ ep = (Etherpkt*)bp->rp;
+ cacheupdate(b, ep->s, port->id);
+ if(b->tcpmss)
+ tcpmsshack(ep, BLEN(bp));
+
+ /*
+ * delay packets to simulate a slow link
+ */
+ if(b->delay0 || b->delayn){
+ md = b->delay0 + b->delayn * BLEN(bp);
+ if(md > 0)
+ microdelay(md);
+ }
+
+ if(ep->d[0] & 1) {
+ log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
+ port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
+ port->inmulti++;
+ bp2 = bp; bp = nil;
+ ethermultiwrite(b, bp2, port);
+ } else {
+ ce = cachelookup(b, ep->d);
+ if(ce == nil) {
+ b->miss++;
+ port->inunknown++;
+ bp2 = bp; bp = nil;
+ ethermultiwrite(b, bp2, port);
+ }else if(ce->port != port->id){
+ b->hit++;
+ bp2 = bp; bp = nil;
+ etherwrite(b->port[ce->port], bp2);
+ }
+ }
+
+ poperror();
+ if(bp)
+ freeb(bp);
+ }
+// print("etherread: trying to exit\n");
+ port->readp = nil;
+ portfree(port);
+ qunlock(b);
+ pexit("hangup", 1);
+}
+
+static int
+fragment(Etherpkt *epkt, int n)
+{
+ Iphdr *iphdr;
+
+ if(n <= TunnelMtu)
+ return 0;
+
+ /* ignore non-ipv4 packets */
+ if(nhgets(epkt->type) != ETIP4)
+ return 0;
+ iphdr = (Iphdr*)(epkt->data);
+ n -= ETHERHDRSIZE;
+ /*
+ * ignore: IP runt packets, bad packets (I don't handle IP
+ * options for the moment), packets with don't-fragment set,
+ * and short blocks.
+ */
+ if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
+ iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
+ return 0;
+
+ return 1;
+}
+
+
+static void
+etherwrite(Port *port, Block *bp)
+{
+ Iphdr *eh, *feh;
+ Etherpkt *epkt;
+ int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
+ Block *xp, *nb;
+ ushort fragoff, frag;
+
+ port->out++;
+ epkt = (Etherpkt*)bp->rp;
+ n = blocklen(bp);
+ if(port->type != Ttun || !fragment(epkt, n)) {
+ devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
+ return;
+ }
+ port->outfrag++;
+ if(waserror()){
+ freeblist(bp);
+ nexterror();
+ }
+
+ seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
+ eh = (Iphdr*)(epkt->data);
+ len = nhgets(eh->length);
+ frag = nhgets(eh->frag);
+ mf = frag & IP_MF;
+ frag <<= 3;
+ dlen = len - IPHDR;
+ xp = bp;
+ lid = nhgets(eh->id);
+ offset = ETHERHDRSIZE+IPHDR;
+ while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ if(0)
+ print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
+ seglen, dlen, mf, frag);
+ for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
+ nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
+
+ feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
+
+ memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
+ nb->wp += ETHERHDRSIZE+IPHDR;
+
+ if((fragoff + seglen) >= dlen) {
+ seglen = dlen - fragoff;
+ hnputs(feh->frag, (frag+fragoff)>>3 | mf);
+ }
+ else
+ hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
+
+ hnputs(feh->length, seglen + IPHDR);
+ hnputs(feh->id, lid);
+
+ /* Copy up the data area */
+ chunk = seglen;
+ while(chunk) {
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ feh->cksum[0] = 0;
+ feh->cksum[1] = 0;
+ hnputs(feh->cksum, ipcsum(&feh->vihl));
+
+ /* don't generate small packets */
+ if(BLEN(nb) < ETHERMINTU)
+ nb->wp = nb->rp + ETHERMINTU;
+ devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
+ }
+ poperror();
+ freeblist(bp);
+}
+
+// hold b lock
+static void
+portfree(Port *port)
+{
+ port->ref--;
+ if(port->ref < 0)
+ panic("portfree: bad ref");
+ if(port->ref > 0)
+ return;
+
+ if(port->data[0])
+ cclose(port->data[0]);
+ if(port->data[1])
+ cclose(port->data[1]);
+ memset(port, 0, sizeof(Port));
+ free(port);
+}
+
+Dev bridgedevtab = {
+ 'B',
+ "bridge",
+
+ devreset,
+ bridgeinit,
+ devshutdown,
+ bridgeattach,
+ bridgewalk,
+ bridgestat,
+ bridgeopen,
+ devcreate,
+ bridgeclose,
+ bridgeread,
+ devbread,
+ bridgewrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};