aoe initiator for plan 9 (#æ) and devsd fittings. Reference: /n/sources/patch/applied/aoe-initiator Date: Thu Aug 9 00:01:56 CES 2007 Signed-off-by: quanstro@quanstro.net --- /sys/src/9/port/aoe.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9/port/aoe.h Thu Aug 9 21:42:56 2007 @@ -0,0 +1,69 @@ +/* + * ATA-over-Ethernet (AoE) protocol + */ +enum { + ACata, + ACconfig, +}; + +enum { + AQCread, + AQCtest, + AQCprefix, + AQCset, + AQCfset, +}; + +enum { + AEcmd = 1, + AEarg, + AEdev, + AEcfg, + AEver, +}; + +enum { + Aoetype = 0x88a2, + Aoesectsz = 512, /* assumed sector size */ + Aoever = 1, + + AFerr = 1<<2, + AFrsp = 1<<3, + + AAFwrite= 1, + AAFext = 1<<6, +}; + +typedef struct { + uchar dst[Eaddrlen]; + uchar src[Eaddrlen]; + uchar type[2]; + uchar verflag; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; +} Aoehdr; + +typedef struct { + Aoehdr; + uchar aflag; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; +} Aoeata; + +typedef struct { + Aoehdr; + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; +} Aoeqc; + +extern char Echange[]; +extern char Enotup[]; --- /sys/src/9/port/devaoe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/port/devaoe.c Fri Aug 10 02:15:12 2007 @@ -0,0 +1,2441 @@ +/* + * © 2005-7 coraid + * aoe storage initiator + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" +#include "../ip/ip.h" +#include "../port/aoe.h" + +#pragma varargck type "æ" Aoedev* +#pragma varargck argpos eventlog 1 + +#define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug); +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Maxframes = 24, + Ndevlink = 6, + Nea = 6, + Nnetlink = 6, +}; + +#define TYPE(q) ((ulong)(q).path & 0xf) +#define UNIT(q) (((ulong)(q).path>>4) & 0xf) +#define L(q) (((ulong)(q).path>>8) & 0xf) +#define QID(u, t) ((u)<<4 | (t)) +#define Q3(l, u, t) ((l)<<8 | QID(u, t)) +#define UP(d) ((d)->flag & Dup) + +#define Ticks MACHP(0)->ticks +#define Ms2tk(t) (((t)*HZ)/1000) +#define Tk2ms(t) (((t)*1000)/HZ) + +enum { + Qzero, + Qtopdir = 1, + Qtopbase, + Qtopctl = Qtopbase, + Qtoplog, + Qtopend, + + Qunitdir, + Qunitbase, + Qctl = Qunitbase, + Qdata, + Qconfig, + Qident, + + Qdevlinkdir, + Qdevlinkbase, + Qdevlink = Qdevlinkbase, + Qdevlinkend, + + Qtopfiles = Qtopend-Qtopbase, + Qdevlinkfiles = Qdevlinkend-Qdevlinkbase, + + Eventlen = 256, + Nevents = 64, + + Fread = 0, + Fwrite, + Tfree = -1, + Tmgmt, + + /* round trip bounds, timeouts, in ticks */ + Rtmax = Ms2tk(320), + Rtmin = Ms2tk(20), + Srbtimeout = 45*HZ, + Deadtk = 120*HZ, + + Dbcnt = 1024, + + Crd = 0x20, + Crdext = 0x24, + Cwr = 0x30, + Cwrext = 0x34, + Cid = 0xec, +}; + +/* + * unified set of flags + * a Netlink + Aoedev most both be jumbo capable + * to send jumbograms to that interface. + */ +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, + + /* aoe specific */ + Dup = 1<<6, + Djumbo = 1<<7, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", + + "up", + "jumbo", +}; + +typedef struct { + uchar flag; + uchar lostjumbo; + int datamtu; + + Chan *cc; + Chan *dc; + Chan *mtu; /* open early to prevent bind issues. */ + char path[Maxpath]; + uchar ea[Eaddrlen]; +} Netlink; + +typedef struct { + Netlink *nl; + int nea; + ulong eaidx; + uchar eatab[Nea][Eaddrlen]; + ulong npkt; + ulong resent; + uchar flag; + + ulong rttavg; + ulong mintimer; +} Devlink; + +typedef struct Srb Srb; +struct Srb { + Rendez; + Srb *next; + ulong ticksent; + ulong len; + vlong sector; + short write; + short nout; + char *error; + void *dp; + void *data; +}; + +typedef struct { + int tag; + ulong bcnt; + vlong lba; + ulong ticksent; + int nhdr; + uchar hdr[ETHERMINTU]; + void *dp; + Devlink *dl; + Netlink *nl; + int eaidx; + Srb *srb; +} Frame; + +typedef struct Aoedev Aoedev; +struct Aoedev { + QLock; + Aoedev *next; + + ulong vers; + + int ndl; + ulong dlidx; + Devlink *dl; + Devlink dltab[Ndevlink]; + + ushort fwver; + uchar flag; + int nopen; + int major; + int minor; + int unit; + int lasttag; + int nframes; + Frame *frames; + vlong bsize; + vlong realbsize; + + uint maxbcnt; + ulong lostjumbo; + ushort nout; + ushort maxout; + ulong lastwadj; + Srb *head; + Srb *tail; + Srb *inprocess; + + /* magic numbers 'R' us */ + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + int nconfig; + uchar config[1024]; + uchar ident[512]; +}; + +static struct { + Lock; + QLock; + Rendez; + char buf[Eventlen*Nevents]; + char *rp; + char *wp; +} events; + +static struct { + RWlock; + int nd; + Aoedev *d; +} devs; + +static struct { + Lock; + int reader[Nnetlink]; /* reader is running. */ + Rendez rendez[Nnetlink]; /* confirm exit. */ + Netlink nl[Nnetlink]; +} netlinks; + +extern Dev aoedevtab; +static Ref units; +static int debug; +static int autodiscover = 1; +static int rediscover; + +char Enotup[] = "aoe device is down"; +char Echange[] = "media or partition has changed"; + +static Srb* +srballoc(ulong sz) +{ + Srb *srb; + + srb = malloc(sizeof *srb+sz); + srb->dp = srb->data = srb+1; + srb->ticksent = Ticks; + return srb; +} + +static Srb* +srbkalloc(void *db, ulong) +{ + Srb *srb; + + srb = malloc(sizeof *srb); + srb->dp = srb->data = db; + srb->ticksent = Ticks; + return srb; +} + +#define srbfree(srb) free(srb) + +static void +srberror(Srb *srb, char *s) +{ + srb->error = s; + srb->nout--; + wakeup(srb); +} + +static void +frameerror(Aoedev *d, Frame *f, char *s) +{ + Srb *srb; + + srb = f->srb; + if(f->tag == Tfree || !srb) + return; + f->srb = nil; + f->tag = Tfree; /* don't get fooled by way-slow responses */ + srberror(srb, s); + d->nout--; +} + +static char* +unitname(Aoedev *d) +{ + uprint("%d.%d", d->major, d->minor); + return up->genbuf; +} + +static int +eventlogready(void*) +{ + return *events.rp; +} + +static long +eventlogread(void *a, long n) +{ + int len; + char *p, *buf; + + buf = smalloc(Eventlen); + qlock(&events); + lock(&events); + p = events.rp; + len = *p; + if(len == 0){ + n = 0; + unlock(&events); + } else { + if(n > len) + n = len; + /* can't move directly into pageable space with events lock held */ + memmove(buf, p+1, n); + *p = 0; + events.rp = p += Eventlen; + if(p >= events.buf + sizeof events.buf) + events.rp = events.buf; + unlock(&events); + + /* the concern here is page faults in memmove below */ + if(waserror()){ + free(buf); + qunlock(&events); + nexterror(); + } + memmove(a, buf, n); + poperror(); + } + free(buf); + qunlock(&events); + return n; +} + +static int +eventlog(char *fmt, ...) +{ + int dragrp, n; + char *p; + va_list arg; + + lock(&events); + p = events.wp; + dragrp = *p++; + va_start(arg, fmt); + n = vsnprint(p, Eventlen-1, fmt, arg); + *--p = n; + p = events.wp += Eventlen; + if(p >= events.buf + sizeof events.buf) + p = events.wp = events.buf; + if(dragrp) + events.rp = p; + unlock(&events); + wakeup(&events); + return n; +} + +static int +eventcount(void) +{ + int n; + + lock(&events); + if(*events.rp == 0) + n = 0; + else if(events.wp < events.rp) + n = Nevents - (events.rp - events.wp); + else + n = events.wp-events.rp; + unlock(&events); + return n/Eventlen; +} + +static int +tsince(int tag) +{ + int n; + + n = Ticks & 0xffff; + n -= tag & 0xffff; + if(n < 0) + n += 1<<16; + return n; +} + +static int +newtag(Aoedev *d) +{ + int t; + + do { + t = ++d->lasttag << 16; + t |= Ticks & 0xffff; + } while (t == Tfree || t == Tmgmt); + return t; +} + +static void +downdev(Aoedev *d, char *err) +{ + Frame *f, *e; + + d->flag &= ~Dup; + f = d->frames; + e = f + d->nframes; + for(; f < e; f->tag = Tfree, f->srb = nil, f++) + frameerror(d, f, Enotup); + d->inprocess = nil; + eventlog("%æ: removed; %s\n", d, err); +} + +static Block* +allocfb(Frame *f, int dlen) +{ + int len; + Block *b; + + len = f->nhdr + dlen; + if(len < ETHERMINTU) + len = ETHERMINTU; + b = allocb(len); + memmove(b->wp, f->hdr, f->nhdr); + if(dlen) + memmove(b->wp + f->nhdr, f->dp, dlen); + b->wp += len; + return b; +} + +static void +putlba(Aoeata *a, vlong lba) +{ + uchar *c; + + c = a->lba; + c[0] = lba; + c[1] = lba >> 8; + c[2] = lba >> 16; + c[3] = lba >> 24; + c[4] = lba >> 32; + c[5] = lba >> 40; +} + +static Devlink* +pickdevlink(Aoedev *d) +{ + ulong i, n; + Devlink *l; + + for(i = 0; i < d->ndl; i++){ + n = d->dlidx++ % d->ndl; + l = d->dl + n; + if(l && l->flag & Dup) + return l; + } + return 0; +} + +static int +pickea(Devlink *l) +{ + if(l->nea == 0) + return -1; + return l->eaidx++ % l->nea; +} + +static int +hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd) +{ + int i; + Devlink *l; + + l = pickdevlink(d); + i = pickea(l); + if(l == 0 || i == -1 /* || (d->flag&Dup) == 0 */ ){ + eventlog("%æ: resend fails. no netlink/no ea\n", d); + d->flag &= ~Dup; + frameerror(d, f, Enotup); + return -1; + } + if(f->srb && Ticks-f->srb->ticksent > Srbtimeout){ + eventlog("%æ: srb timeout\n", d); + frameerror(d, f, Etimedout); + return -1; + } + memmove(h->dst, l->eatab + Eaddrlen*i, Eaddrlen); + memmove(h->src, l->nl->ea, sizeof h->src); + hnputs(h->type, Aoetype); + h->verflag = Aoever << 4; + h->error = 0; + hnputs(h->major, d->major); + h->minor = d->minor; + h->cmd = cmd; + + hnputl(h->tag, f->tag = newtag(d)); + f->dl = l; + f->nl = l->nl; + f->eaidx = i; + f->ticksent = Ticks; + + return f->tag; +} + +static int +resend(Aoedev *d, Frame *f) +{ + unsigned n; + Aoeata *a; + + a = (Aoeata*)f->hdr; + if(hset(d, f, a, a->cmd) == -1) + return -1; + n = f->bcnt; + if(n > d->maxbcnt) + n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */ + a->scnt = n / Aoesectsz; + f->dl->resent++; + f->dl->npkt++; + devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f, n), 0); + return 0; +} + +static void +discover(int major, int minor) +{ + Aoehdr *h; + Block *b; + Netlink *nl, *e; + + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e; nl++){ + if(nl->cc == nil) + continue; + b = allocb(ETHERMINTU); + if(waserror()){ + freeb(b); + nexterror(); + } + b->wp = b->rp + ETHERMINTU; + memset(b->rp, 0, ETHERMINTU); + h = (Aoehdr*)b->rp; + memset(h->dst, 0xff, sizeof h->dst); + memmove(h->src, nl->ea, sizeof h->src); + hnputs(h->type, Aoetype); + h->verflag = Aoever << 4; + hnputs(h->major, major); + h->minor = minor; + h->cmd = ACconfig; + poperror(); + devtab[nl->dc->type]->bwrite(nl->dc, b, 0); + } +} + +/* + * Check all frames on device and resend any frames that have been + * outstanding for 200% of the device round trip time average. + */ +static void +aoesweep(void*) +{ + ulong i, tx, timeout, nbc; + vlong starttick; + enum { Nms = 100, Nbcms = 30*1000, }; + uchar *ea; + Aoeata *a; + Aoedev *d; + Devlink *l; + Frame *f, *e; + + nbc = Nbcms/Nms; +loop: + if(nbc-- == 0){ + if(rediscover) + discover(0xffff, 0xff); + nbc = Nbcms/Nms; + } + starttick = Ticks; + rlock(&devs); + for(d = devs.d; d; d = d->next){ + if(!canqlock(d)) + continue; + if(!UP(d)){ + qunlock(d); + continue; + } + tx = 0; + f = d->frames; + e = f + d->nframes; + for (; f < e; f++){ + if(f->tag == Tfree) + continue; + l = f->dl; + timeout = l->rttavg << 1; + i = tsince(f->tag); + if(i < timeout) + continue; + if(d->nout == d->maxout){ + if(d->maxout > 1) + d->maxout--; + d->lastwadj = Ticks; + } + a = (Aoeata*)f->hdr; + if(a->scnt > Dbcnt / Aoesectsz && + ++f->nl->lostjumbo > (d->nframes << 1)){ + ea = f->nl->ea + 6*f->eaidx; + eventlog("%æ: jumbo failure on %s:%E; lba%lld\n", + d, f->nl->path, ea, f->lba); + d->maxbcnt = Dbcnt; + d->flag &= ~Djumbo; + } + resend(d, f); + if(tx++ == 0){ + if((l->rttavg <<= 1) > Rtmax) + l->rttavg = Rtmax; + eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg)); + } + } + if(d->nout == d->maxout && d->maxout < d->nframes && + TK2MS(Ticks-d->lastwadj) > 10*1000){ + d->maxout++; + d->lastwadj = Ticks; + } + qunlock(d); + } + runlock(&devs); + i = Nms - TK2MS(Ticks - starttick); + if(i > 0) + tsleep(&up->sleep, return0, 0, i); + goto loop; +} + +static int +fmtæ(Fmt *f) +{ + char buf[8]; + Aoedev *d; + + d = va_arg(f->args, Aoedev*); + snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor); + return fmtstrcpy(f, buf); +} + +static void netbind(char *path); + +static void +aoecfg(void) +{ + int n, i; + char *p, *f[32], buf[24]; + + if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1) + return; + /* goo! */ + for(i = 0; i < n; i++){ + p = f[i]; + if(strncmp(p, "ether", 5) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]); + else if(strncmp(p, "#l", 2) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]); + else + continue; + netbind(buf); + } +} + +static void +aoeinit(void) +{ + static int init; + static QLock l; + + if(!canqlock(&l)) + return; + if(init++ > 0){ + qunlock(&l); + return; + } + + fmtinstall(L'æ', fmtæ); + events.rp = events.wp = events.buf; + kproc("aoesweep", aoesweep, nil); + aoecfg(); + + qunlock(&l); +} + +static Chan* +aoeattach(char *spec) +{ + Chan *c; + + if(*spec) + error(Enonexist); + + aoeinit(); + c = devattach(L'æ', spec); + mkqid(&c->qid, Qzero, 0, QTDIR); + + return c; +} + +static Aoedev* +unit2dev(ulong unit) +{ + int i; + Aoedev *d; + + rlock(&devs); + i = 0; + for(d = devs.d; d; d = d->next) + if(i++ == unit){ + runlock(&devs); + return d; + } + runlock(&devs); + uprint("unit lookup failure: %lux pc %p", unit, getcallerpc(&unit)); + error(up->genbuf); +// error("unit lookup failure"); + return nil; +} + +static int +unitgen(Chan *c, ulong type, Dir *dp) +{ + int perm, t; + ulong vers; + vlong size; + char *p; + Aoedev *d; + Qid q; + + d = unit2dev(UNIT(c->qid)); + perm = 0666; + size = 0; + vers = d->vers; + t = QTFILE; + + switch(type){ + default: + return -1; + case Qctl: + p = "ctl"; + break; + case Qdata: + p = "data"; + if(UP(d)) + size = d->bsize; + break; + case Qconfig: + p = "config"; + if(UP(d)) + size = d->nconfig; + break; + case Qident: + p = "ident"; + if(UP(d)) + size = sizeof d->ident; + break; + case Qdevlinkdir: + p = "devlink"; + t = QTDIR; + perm = 0555; + break; + } + mkqid(&q, QID(UNIT(c->qid), type), vers, t); + devdir(c, q, p, size, eve, perm, dp); + return 1; +} + +static int +topgen(Chan *c, ulong type, Dir *d) +{ + int perm; + vlong size; + char *p; + Qid q; + + perm = 0444; + size = 0; + switch(type){ + default: + return -1; + case Qtopctl: + p = "ctl"; + perm = 0644; + break; + case Qtoplog: + p = "log"; + size = eventcount(); + break; + } + mkqid(&q, type, 0, QTFILE); + devdir(c, q, p, size, eve, perm, d); + return 1; +} + +static int +aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp) +{ + int i; + Aoedev *d; + Qid q; + + if(c->qid.path == 0){ + switch(s){ + case DEVDOTDOT: + q.path = 0; + q.type = QTDIR; + devdir(c, q, "#æ", 0, eve, 0555, dp); + break; + case 0: + q.path = Qtopdir; + q.type = QTDIR; + devdir(c, q, "aoe", 0, eve, 0555, dp); + break; + default: + return -1; + } + return 1; + } + + switch(TYPE(c->qid)){ + default: + return -1; + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, Qzero, 0, QTDIR); + devdir(c, q, "aoe", 0, eve, 0555, dp); + return 1; + } + if(s < Qtopfiles) + return topgen(c, Qtopbase + s, dp); + s -= Qtopfiles; + if(s >= units.ref) + return -1; + mkqid(&q, QID(s, Qunitdir), 0, QTDIR); + d = unit2dev(s); + devdir(c, q, unitname(d), 0, eve, 0555, dp); + return 1; + case Qtopctl: + case Qtoplog: + return topgen(c, TYPE(c->qid), dp); + case Qunitdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, Qtopdir), 0, QTDIR); + uprint("%uld", UNIT(c->qid)); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + return unitgen(c, Qunitbase+s, dp); + case Qctl: + case Qdata: + case Qconfig: + return unitgen(c, TYPE(c->qid), dp); + case Qdevlinkdir: + i = UNIT(c->qid); + if(s == DEVDOTDOT){ + mkqid(&q, QID(i, Qunitdir), 0, QTDIR); + devdir(c, q, "devlink", 0, eve, 0555, dp); + return 1; + } + if(i >= units.ref) + return -1; + d = unit2dev(i); + if(s >= d->ndl) + return -1; + uprint("%d", s); + mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + case Qdevlink: + uprint("%d", s); + mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + } +} + +static Walkqid* +aoewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, aoegen); +} + +static int +aoestat(Chan *c, uchar *db, int n) +{ + return devstat(c, db, n, nil, 0, aoegen); +} + +static Chan* +aoeopen(Chan *c, int omode) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata) + return devopen(c, omode, 0, 0, aoegen); + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + if(!UP(d)) + error(Enotup); + c = devopen(c, omode, 0, 0, aoegen); + d->nopen++; + poperror(); + qunlock(d); + return c; +} + +static void +aoeclose(Chan *c) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0) + return; + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(--d->nopen == 0 && !waserror()){ + discover(d->major, d->minor); + poperror(); + } + qunlock(d); +} + +static void +atarw(Aoedev *d, Frame *f) +{ + long n; + ulong bcnt; + char extbit, writebit; + Aoeata *ah; + Srb *srb; + + extbit = 0x4; + writebit = 0x10; + + srb = d->inprocess; + bcnt = d->maxbcnt; + if(bcnt == 0) + bcnt = ETHERMINTU; + if(bcnt > srb->len) + bcnt = srb->len; + f->nhdr = sizeof *ah; + memset(f->hdr, 0, f->nhdr); + ah = (Aoeata*)f->hdr; + if(hset(d, f, ah, ACata) == -1) + return; + f->dp = srb->dp; + f->bcnt = bcnt; + f->lba = srb->sector; + f->srb = srb; + + ah->scnt = bcnt / Aoesectsz; + putlba(ah, f->lba); + if(d->flag & Dllba) + ah->aflag |= AAFext; + else { + extbit = 0; + ah->lba[3] &= 0x0f; + ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */ + } + if(srb->write){ + ah->aflag |= AAFwrite; + n = bcnt; + }else{ + writebit = 0; + n = 0; + } + ah->cmdstat = 0x20 | writebit | extbit; + + /* mark tracking fields and load out */ + srb->nout++; + srb->dp = (uchar*)srb->dp + bcnt; + srb->len -= bcnt; + srb->sector += bcnt / Aoesectsz; + if(srb->len == 0) + d->inprocess = nil; + d->nout++; + f->dl->npkt++; + if(waserror()){ + f->tag = Tfree; + d->inprocess = nil; + nexterror(); + } + devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f, n), 0); + poperror(); +} + +static char* +aoeerror(Aoehdr *h) +{ + int n; + static char *errs[] = { + "aoe protocol error: unknown", + "aoe protocol error: bad command code", + "aoe protocol error: bad argument param", + "aoe protocol error: device unavailable", + "aoe protocol error: config string present", + "aoe protocol error: unsupported version", + }; + + if((h->verflag & AFerr) == 0) + return 0; + n = h->error; + if(n > nelem(errs)) + n = 0; + return errs[n]; +} + +static void +rtupdate(Devlink *l, int rtt) +{ + int n; + + n = rtt; + if(rtt < 0){ + n = -rtt; + if(n < Rtmin) + n = Rtmin; + else if(n > Rtmax) + n = Rtmax; + l->mintimer += (n - l->mintimer) >> 1; + } else if(n < l->mintimer) + n = l->mintimer; + else if(n > Rtmax) + n = Rtmax; + + /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */ + n -= l->rttavg; + l->rttavg += n >> 2; +} + +static int +srbready(void *v) +{ + Srb *s; + + s = v; + return s->error || (!s->nout && !s->len); +} + +static Frame* +getframe(Aoedev *d, int tag) +{ + Frame *f, *e; + + f = d->frames; + e = f + d->nframes; + for(; f < e; f++) + if(f->tag == tag) + return f; + return nil; +} + +static Frame* +freeframe(Aoedev *d) +{ + if(d->nout < d->maxout) + return getframe(d, Tfree); + return nil; +} + +static void +work(Aoedev *d) +{ + Frame *f; + + while ((f = freeframe(d)) != nil) { + if(d->inprocess == nil){ + if(d->head == nil) + return; + d->inprocess = d->head; + d->head = d->head->next; + if(d->head == nil) + d->tail = nil; + } + atarw(d, f); + } +} + +static void +strategy(Aoedev *d, Srb *srb) +{ + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + srb->next = nil; + if(d->tail) + d->tail->next = srb; + d->tail = srb; + if(d->head == nil) + d->head = srb; + work(d); + poperror(); + qunlock(d); + + while(waserror()) + ; + sleep(srb, srbready, srb); + poperror(); +} + +#define iskaddr(a) ((uintptr)(a) > KZERO) + +static long +rw(Aoedev *d, int write, uchar *db, long len, uvlong off) +{ + long n, nlen, copy; + enum { Srbsz = 1<<18, }; + Srb *srb; + + if((off|len) & (Aoesectsz-1)) + error("offset and length must be sector multiple.\n"); + if(off > d->bsize) + return 0; + if(off + len > d->bsize) + len = d->bsize - off; + copy = 0; + if(iskaddr(db)){ + srb = srbkalloc(db, len); + copy = 1; + }else + srb = srballoc(Srbsz <= len? Srbsz: len); + if(waserror()){ + srbfree(srb); + nexterror(); + } + nlen = len; + srb->write = write; + for (;;) { + if(!UP(d)) + error(Eio); + srb->sector = off / Aoesectsz; + srb->dp = srb->data; + n = nlen; + if(n > Srbsz) + n = Srbsz; + srb->len = n; + if(write == 1 && copy == 0) + memmove(srb->data, db, n); + strategy(d, srb); + if(srb->error) + error(srb->error); + if(write == 0 && copy == 0) + memmove(db, srb->data, n); + nlen -= n; + if(nlen == 0) + break; + db += n; + off += n; + } + poperror(); + srbfree(srb); + return len; +} + +static long +readmem(ulong off, void *dst, long n, void *src, long size) +{ + if(off >= size) + return 0; + if(off + n > size) + n = size - off; + memmove(dst, (uchar*)src + off, n); + return n; +} + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i, m; + + for(i = 0; i < 8; i++){ + m = 1 << i; + if(f & m) + s = seprint(s, e, "%s ", flagname[i]? flagname[i]: "oops"); + } + return seprint(s, e, "\n"); +} + +static int +pstat(Aoedev *d, char *db, int len, int off) +{ + int i; + char *state, *s, *p, *e; + + s = p = malloc(1024); + e = p + 1024; + + state = "down"; + if(UP(d)) + state = "up"; + + p = seprint(p, e, + "state: %s\n" "nopen: %d\n" "nout: %d\n" + "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d\n" + "fw: %d\n" + "model: %s\n" "serial: %s\n" "firmware: %s\n", + state, d->nopen, d->nout, + d->maxout, d->nframes, d->maxbcnt, + d->fwver, + d->model, d->serial, d->firmware); + p = seprint(p, e, "flag: "); + p = pflag(p, e, d->flag); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +unitread(Chan *c, void *db, long len, vlong off) +{ + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + if(d->vers != c->qid.vers) + error(Echange); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return pstat(d, db, len, off); + case Qdata: + return rw(d, 0, db, len, off); + case Qconfig: + if (!UP(d)) + error(Enotup); + return readmem(off, db, len, d->config, d->nconfig); + case Qident: + if (!UP(d)) + error(Enotup); + return readmem(off, db, len, d->ident, sizeof d->ident); + } +} + +static int +devlinkread(Chan *c, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Aoedev *d; + Devlink *l; + + d = unit2dev(UNIT(c->qid)); + i = L(c->qid); + if(i >= d->ndl) + return 0; + l = d->dl + i; + + s = p = malloc(1024); + e = s + 1024; + + p = seprint(p, e, "addr: "); + for(i = 0; i < l->nea; i++) + p = seprint(p, e, "%E ", l->eatab[i]); + p = seprint(p, e, "\n"); + p = seprint(p, e, "npkt: %uld\n", l->npkt); + p = seprint(p, e, "resent: %uld\n", l->resent); + p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg)); + p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer)); + + p = seprint(p, e, "nl path: %s\n", l->nl->path); + p = seprint(p, e, "nl ea: %E\n", l->nl->ea); + p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo); + p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +topctlread(Chan *, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Netlink *n; + + s = p = malloc(1024); + e = s + 1024; + + p = seprint(p, e, "debug: %d\n", debug); + p = seprint(p, e, "autodiscover: %d\n", autodiscover); + p = seprint(p, e, "rediscover: %d\n", rediscover); + + for(i = 0; i < Nnetlink; i++){ + n = netlinks.nl+i; + if(n->cc == 0) + continue; + p = seprint(p, e, "if%d path: %s\n", i, n->path); + p = seprint(p, e, "if%d ea: %E\n", i, n->ea); + p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag); + p = seprint(p, e, "if%d lostjumbo: %b\n", i, n->lostjumbo); + p = seprint(p, e, "if%d datamtu: %b\n", i, n->datamtu); + } + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +aoeread(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qzero: + case Qtopdir: + case Qunitdir: + case Qdevlinkdir: + return devdirread(c, db, n, 0, 0, aoegen); + case Qtopctl: + return topctlread(c, db, n, off); + case Qtoplog: + return eventlogread(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitread(c, db, n, off); + case Qdevlink: + return devlinkread(c, db, n, off); + } +} + +static long +configwrite(Aoedev *d, void *db, long len) +{ + char *s; + Aoeqc *ch; + Frame *f; + Srb *srb; + + if(!UP(d)) + error(Enotup); + if(len > ETHERMAXTU-sizeof *ch) + error(Etoobig); + srb = srballoc(len); + s = malloc(len); + memmove(s, db, len); + if(waserror()){ + srbfree(srb); + free(s); + nexterror(); + } + for (;;) { + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + f = freeframe(d); + if(f != nil) + break; + poperror(); + qunlock(d); + tsleep(&up->sleep, return0, 0, 100); + } + f->nhdr = sizeof *ch; + memset(f->hdr, 0, f->nhdr); + ch = (Aoeqc*)f->hdr; + if(hset(d, f, ch, ACconfig) == -1) + return 0; + f->srb = srb; + f->dp = s; + ch->verccmd = AQCfset; + hnputs(ch->cslen, len); + d->nout++; + srb->nout++; + f->dl->npkt++; + /* + * these refer to qlock & waserror in the above for loop. + * there's still the first waserror outstanding. + */ + poperror(); + qunlock(d); + + devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f, len), 0); + sleep(srb, srbready, srb); + if(srb->error) + error(srb->error); + + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + memmove(d->config, s, len); + d->nconfig = len; + poperror(); + qunlock(d); + + poperror(); /* pop first waserror */ + + srbfree(srb); + memmove(db, s, len); + free(s); + return len; +} + +static int getmtu(Chan*); + +static int +devmaxdata(Aoedev *d) +{ + int i, m, mtu; + Devlink *l; + Netlink *n; + + mtu = 100000; + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + n = l->nl; + if((l->flag & Dup) == 0 || (n->flag & Dup) == 0) + continue; + m = getmtu(n->mtu); + if(m < mtu) + mtu = m; + } + if(mtu == 100000) + mtu = 0; + mtu -= sizeof(Aoeata); + return mtu; +} + +static int +toggle(char *s, int init) +{ + if(s == nil) + return init ^ 1; + return strcmp(s, "on") == 0; +} + +static void ataident(Aoedev*); + +static long +unitctlwrite(Aoedev *d, void *db, long n) +{ + uint maxbcnt, m; + uvlong bsize; + enum { + Failio, + Ident, + Jumbo, + Maxbno, + Mtu, + Setsize, + }; + Cmdbuf *cb; + Cmdtab *ct, cmds[] = { + {Failio, "failio", 1 }, + {Ident, "identify", 1 }, + {Jumbo, "jumbo", 0 }, + {Maxbno, "maxbno", 0 }, + {Mtu, "mtu", 0 }, + {Setsize, "setsize", 0 }, + }; + + cb = parsecmd(db, n); + qlock(d); + if(waserror()){ + qunlock(d); + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + switch(ct->index){ + case Failio: + downdev(d, "i/o failure"); + break; + case Ident: + ataident(d); + break; + case Jumbo: + m = 0; + if(d->flag & Djumbo) + m = 1; + toggle(cb->f[1], m); + if(m) + d->flag |= Djumbo; + else + d->flag &= ~Djumbo; + break; + case Maxbno: + case Mtu: + maxbcnt = devmaxdata(d); + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + m = strtoul(cb->f[1], 0, 0); + if(ct->index == Maxbno) + m *= Aoesectsz; + else{ + m -= sizeof(Aoeata); + m &= ~(Aoesectsz-1); + } + if(m > maxbcnt) + cmderror(cb, "maxb greater than media mtu"); + maxbcnt = m; + } + d->maxbcnt = maxbcnt; + break; + case Setsize: + bsize = d->realbsize; + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + bsize = strtoull(cb->f[1], 0, 0); + if(bsize % Aoesectsz) + cmderror(cb, "disk size must be sector aligned"); + } + d->bsize = bsize; + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + qunlock(d); + free(cb); + return n; +} + +static long +unitwrite(Chan *c, void *db, long n, vlong off) +{ + long rv; + char *buf; + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return unitctlwrite(d, db, n); + case Qident: + error(Eperm); + case Qdata: + return rw(d, 1, db, n, off); + case Qconfig: + if(off + n > sizeof d->config) + error(Etoobig); + buf = malloc(sizeof d->config); + memmove(buf, d->config, d->nconfig); + memmove(buf + off, db, n); + rv = configwrite(d, buf, n + off); + free(buf); + return rv; + } +} + +static Netlink* +addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea) +{ + Netlink *nl, *e; + + lock(&netlinks); + if(waserror()){ + unlock(&netlinks); + nexterror(); + } + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e; nl++){ + if(nl->cc) + continue; + nl->cc = cc; + nl->dc = dc; + nl->mtu = mtu; + strncpy(nl->path, path, sizeof nl->path); + memmove(nl->ea, ea, sizeof nl->ea); + poperror(); + nl->flag |= Dup; + unlock(&netlinks); + return nl; + } + error("out of netlink structures"); + return nil; +} + +static int +newunit(void) +{ + int x; + + lock(&units); + x = units.ref++; + unlock(&units); + return x; +} + +static int +dropunit(void) +{ + int x; + + lock(&units); + x = --units.ref; + unlock(&units); + return x; +} + +static Aoedev* +newdev(long major, long minor, int n) +{ + Aoedev *d; + Frame *f, *e; + + d = mallocz(sizeof *d, 1); + f = mallocz(sizeof *f*n, 1); + if (!d || !f) { + free(d); + free(f); + error("aoe device allocation failure"); + } + d->nframes = n; + d->frames = f; + for (e = f + n; f < e; f++) + f->tag = Tfree; + d->maxout = n; + d->major = major; + d->minor = minor; + d->maxbcnt = Dbcnt; + d->flag = Djumbo; + d->unit = newunit(); /* bzzt. inaccurate if units removed */ + d->dl = d->dltab; + return d; +} + +static Aoedev* +mm2dev(long major, long minor) +{ + Aoedev *d; + + rlock(&devs); + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor){ + runlock(&devs); + return d; + } + runlock(&devs); + uprint("mm2dev: device %ld.%ld not found", major, minor); + error(up->genbuf); + return nil; +} + +/* Find the device in our list. If not known, add it */ +static Aoedev* +getdev(long major, long minor, int n) +{ + Aoedev *d; + + wlock(&devs); + if(waserror()){ + wunlock(&devs); + nexterror(); + } + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor) + break; + if (d) { + d = newdev(major, minor, n); + d->next = devs.d; + devs.d = d; + } + poperror(); + wunlock(&devs); + return d; +} + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static u32int +gbit32(void *a) +{ + u32int j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4) << 32 | gbit32(a); +} + +static void +ataident(Aoedev *d) +{ + Aoeata *a; + Block *b; + Frame *f; + + f = freeframe(d); + if(f == nil) + return; + f->nhdr = sizeof *a; + memset(f->hdr, 0, f->nhdr); + a = (Aoeata*)f->hdr; + if(hset(d, f, a, ACata) == -1) + return; + a->cmdstat = Cid; /* ata 6, page 110 */ + a->scnt = 1; + a->lba[3] = 0xa0; + d->nout++; + f->dl->npkt++; + b = allocfb(f, 0); + devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0); +} + +static int +getmtu(Chan *m) +{ + int n, mtu; + char buf[36]; + + mtu = 8192; + if(waserror()) + return mtu; + n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0); + if(n > 12){ + buf[n] = 0; + mtu = strtoul(buf + 12, 0, 0); + } + poperror(); + return mtu; +} + +static int +newdlea(Devlink *l, uchar *ea) +{ + int i; + uchar *t; + + for(i = 0; i < Nea; i++){ + t = l->eatab[i]; + if(i == l->nea){ + memmove(t, ea, Eaddrlen); + return l->nea++; + } + if(memcmp(t, ea, Eaddrlen) == 0) + return i; + } + return -1; +} + +static Devlink* +newdevlink(Aoedev *d, Netlink *n, Aoeqc *c) +{ + int i; + Devlink *l; + + for(i = 0; i < Ndevlink; i++){ + l = d->dl + i; + if(i == d->ndl){ + d->ndl++; + newdlea(l, c->src); + l->nl = n; + l->flag |= Dup; + l->mintimer = Rtmin; + l->rttavg = Rtmax; + return l; + } + if(l->nl == n) + return l; + } + return 0; +} + +static void +errrsp(Block *b, char *s) +{ + int n; + Aoedev *d; + Aoehdr *h; + Frame *f; + + h = (Aoehdr*)b->rp; + n = nhgetl(h->tag); + if(n == Tmgmt || n == Tfree) + return; + d = mm2dev(nhgets(h->major), h->minor); + if(d == 0) + return; + if(f = getframe(d, n)) + frameerror(d, f, s); +} + +static void +qcfgrsp(Block *b, Netlink *nl) +{ + int major, cmd, cslen, blen; + unsigned n; + Aoedev *d; + Aoeqc *ch; + Devlink *l; + Frame *f; + + ch = (Aoeqc*)b->rp; + major = nhgets(ch->major); + n = nhgetl(ch->tag); + if(n != Tmgmt){ + d = mm2dev(major, ch->minor); + qlock(d); + f = getframe(d, n); + if(f == nil){ + qunlock(d); + eventlog("%æ: unknown response tag %ux\n", d, n); + return; + } + cslen = nhgets(ch->cslen); + blen = BLEN(b) - sizeof *ch; + if(cslen < blen) + eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n", + d, n, cslen, blen); + if(cslen > blen){ + eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n", + d, n, cslen, blen); + cslen = blen; + } + memmove(f->dp, ch + 1, cslen); + f->srb->nout--; + wakeup(f->srb); + d->nout--; + f->srb = nil; + f->tag = Tfree; + qunlock(d); + return; + } + + cmd = ch->verccmd & 0xf; + if(cmd != 0){ + eventlog("cfgrsp: bad command %d", cmd); + return; + } + n = nhgets(ch->bufcnt); + if(n > Maxframes) + n = Maxframes; + d = getdev(major, ch->minor, n); + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + + l = newdevlink(d, nl, ch); /* add this interface. */ + + d->fwver = nhgets(ch->fwver); + n = nhgets(ch->cslen); + if(n > sizeof d->config) + n = sizeof d->config; + d->nconfig = n; + memmove(d->config, ch + 1, n); + if(l != 0 && d->flag & Djumbo){ + n = getmtu(nl->mtu) - sizeof(Aoeata); + n /= Aoesectsz; + if(n > ch->scnt) + n = ch->scnt; + n = n? n * Aoesectsz: Dbcnt; + if(n != d->maxbcnt){ + eventlog("%æ: setting %d byte data frames on %s:%E\n", + d, n, nl->path, nl->ea); + d->maxbcnt = n; + } + } + if(d->nopen == 0) + ataident(d); + poperror(); + qunlock(d); +} + +static void +idmove(char *p, ushort *a, unsigned n) +{ + int i; + char *op, *e; + + op = p; + for(i = 0; i < n / 2; i++){ + *p++ = a[i] >> 8; + *p++ = a[i]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + p = op; + while(*p == ' ') + p++; + memmove(op, p, n - (e - p)); +} + +static vlong +aoeidentify(Aoedev *d, ushort *id) +{ + int i; + vlong s; + + d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup); + + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + d->flag |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + d->flag |= Dpower; + i = gbit16(id+82); + if(i & 1) + d->flag |= Dsmart; + if(i & (1<<14)) + d->flag |= Dnop; + } +// eventlog("%æ up\n", d); + d->flag |= Dup; + memmove(d->ident, id, sizeof d->ident); + return s; +} + +static int +identify(Aoedev *d, ushort *id) +{ + vlong osectors, s; + uchar oserial[21]; + + s = aoeidentify(d, id); + if(s == -1) + return -1; + osectors = d->realbsize; + memmove(oserial, d->serial, sizeof d->serial); + + idmove(d->serial, id+10, 20); + idmove(d->firmware, id+23, 8); + idmove(d->model, id+27, 40); + + s *= Aoesectsz; + if((osectors == 0 || osectors != s) && + memcmp(oserial, d->serial, sizeof oserial) != 0){ + d->bsize = s; + d->realbsize = s; +// d->mediachange = 1; + d->vers++; + } + return 0; +} + +static void +atarsp(Block *b) +{ + unsigned n; + short major; + Aoeata *ahin, *ahout; + Aoedev *d; + Frame *f; + Srb *srb; + + ahin = (Aoeata*)b->rp; + major = nhgets(ahin->major); + d = mm2dev(major, ahin->minor); + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + n = nhgetl(ahin->tag); + f = getframe(d, n); + if(f == nil){ + dprint("%æ: unexpected response; tag %ux\n", d, n); + goto bail; + } + rtupdate(f->dl, tsince(f->tag)); + ahout = (Aoeata*)f->hdr; + srb = f->srb; + + if(ahin->cmdstat & 0xa9){ + eventlog("%æ: ata error cmd %.2ux stat %.2ux\n", + d, ahout->cmdstat, ahin->cmdstat); + if(srb) + srb->error = Eio; + } else { + n = ahout->scnt * Aoesectsz; + switch(ahout->cmdstat){ + case Crd: + case Crdext: + if(BLEN(b) - sizeof *ahin < n){ + eventlog("%æ: runt read blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + memmove(f->dp, ahin+1, n); + case Cwr: + case Cwrext: + if(n > Dbcnt) + f->nl->lostjumbo = 0; + if(f->bcnt -= n){ + f->lba += n / Aoesectsz; + f->dp = (uchar*)f->dp + n; + resend(d, f); + goto bail; + } + break; + case Cid: + if(BLEN(b) - sizeof *ahin < 512){ + eventlog("%æ: runt identify blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + identify(d, (ushort*)(ahin + 1)); + break; + default: + eventlog("%æ: unknown ata command %.2ux \n", + d, ahout->cmdstat); + } + } + + if(srb && --srb->nout == 0 && srb->len == 0) + wakeup(srb); + f->srb = nil; + f->tag = Tfree; + d->nout--; + + work(d); +bail: + poperror(); + qunlock(d); +} + +#define ERROR(x) do{ uprint("%s: %s", name, x); error(up->genbuf); }while(0) + +static void +netrdaoe(void *v) +{ + int idx; + char name[Maxpath+1], *s; + Aoehdr *h; + Block *b; + Netlink *nl; + + nl = (Netlink*)v; + idx = nl - netlinks.nl; + netlinks.reader[idx] = 1; + kstrcpy(name, nl->path, Maxpath); + + if(waserror()){ + eventlog("netrdaoe exiting: %s\n", up->errstr); + netlinks.reader[idx] = 0; + wakeup(netlinks.rendez + idx); + pexit(up->errstr, 1); + } + if(autodiscover) + discover(0xffff, 0xff); + for (;;) { + if((nl->flag&Dup) == 0) + ERROR("netlink is down"); + b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0); + if(b == nil) + ERROR("nil read from network"); + h = (Aoehdr*)b->rp; + if(h->verflag & AFrsp) + if(s = aoeerror(h)){ + eventlog("%s: %s\n", nl->path, up->errstr); + errrsp(b, s); + }else + switch(h->cmd){ + case ACata: + atarsp(b); + break; + case ACconfig: + qcfgrsp(b, nl); + break; + default: + eventlog("%s: unknown cmd %d\n", + nl->path, h->cmd); + errrsp(b, "unknown command"); + } + freeb(b); + } +} + +static void +getaddr(char *path, uchar *ea) +{ + int n; + char buf[2*Eaddrlen+1]; + Chan *c; + + uprint("%s/addr", path); + c = namec(up->genbuf, Aopen, OREAD, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + n = devtab[c->type]->read(c, buf, sizeof buf-1, 0); + poperror(); + cclose(c); + buf[n] = 0; + if(parseether(ea, buf) < 0) + error("parseether failure"); +} + +static void +netbind(char *path) +{ + char addr[Maxpath]; + uchar ea[2*Eaddrlen+1]; + Chan *dc, *cc, *mtu; + Netlink *nl; + + snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype); + dc = chandial(addr, nil, nil, &cc); + snprint(addr, sizeof addr, "%s/mtu", path); + mtu = namec(addr, Aopen, OREAD, 0); + + if(waserror()){ + cclose(dc); + cclose(cc); + if(mtu) + cclose(mtu); + nexterror(); + } + if(dc == nil || cc == nil) + error(Enonexist); + getaddr(path, ea); + nl = addnet(path, cc, dc, mtu, ea); + snprint(addr, sizeof addr, "netrdaoe@%s", path); + kproc(addr, netrdaoe, nl); + poperror(); +} + +static int +unbound(void *v) +{ + return *(int*)v != 0; +} + +static void +netunbind(char *path) +{ + int i, idx; + Aoedev *d, *p, *next; + Chan *dc, *cc; + Devlink *l; + Frame *f; + Netlink *n, *e; + + n = netlinks.nl; + e = n + nelem(netlinks.nl); + + lock(&netlinks); + for(; n < e; n++) + if(n->dc && strcmp(n->path, path) == 0) + break; + unlock(&netlinks); + if (n >= e) + error("device not bound"); + + /* + * hunt down devices using this interface; disable + * this also terminates the reader. + */ + idx = n - netlinks.nl; + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + l->flag &= ~Dup; + } + qunlock(d); + } + n->flag &= ~Dup; + wunlock(&devs); + + /* confirm reader is down. */ + while(waserror()) + ; + sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx); + poperror(); + + /* reschedule packets. */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->nframes; i++){ + f = d->frames + i; + if(f->tag != Tfree && f->nl == n) + resend(d, f); + } + qunlock(d); + } + wunlock(&devs); + + /* squeeze devlink pool. (we assert nobody is using them now) */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + memmove(l, l + 1, sizeof *l * (--d->ndl - i)); + } + qunlock(d); + } + wunlock(&devs); + + /* close device link. */ + lock(&netlinks); + dc = n->dc; + cc = n->cc; + if(n->mtu) + cclose(n->mtu); + memset(n, 0, sizeof *n); + unlock(&netlinks); + + cclose(dc); + cclose(cc); + + /* squeeze orphan devices */ + wlock(&devs); + for(p = d = devs.d; d; p = d, d = next){ + next = d->next; + if(d->ndl > 0) + continue; + if(p != devs.d) + p->next = next; + else + devs.d = next; + free(d->frames); + free(d); + dropunit(); + } + wunlock(&devs); +} + +static void +removedev(char *name) +{ + int i; + Aoedev *d, *p; + + wlock(&devs); + for(p = d = devs.d; d; p = d, d = d->next) + if(strcmp(name, unitname(d)) == 0) + goto found; + wunlock(&devs); + error("device not bound"); +found: + d->flag &= ~Dup; + d->vers++; + d->ndl = 0; + + for(i = 0; i < d->nframes; i++) + frameerror(d, d->frames+i, Enotup); + + if(p != devs.d) + p->next = d->next; + else + devs.d = d->next; + free(d->frames); + free(d); + dropunit(); + wunlock(&devs); +} + +static void +discoverstr(char *f) +{ + ushort shelf, slot; + char *s; + + if(f == 0){ + discover(0xffff, 0xff); + return; + } + + shelf = strtol(f, &s, 0); + if(s == f || shelf > 0xffff) + error("bad shelf"); + f = s; + if(*f++ == '.'){ + slot = strtol(f, &s, 0); + if(s == f || slot > 0xff) + error("bad shelf"); + }else + slot = 0xff; + discover(shelf, slot); +} + + +static long +topctlwrite(void *db, long n) +{ + char *f; + enum { + Autodiscover, + Bind, + Debug, + Discover, + Closewait, + Rediscover, + Remove, + Unbind, + }; + Cmdbuf *cb; + Cmdtab *ct, cmds[] = { + { Autodiscover, "autodiscover", 0 }, + { Bind, "bind", 2 }, + { Debug, "debug", 0 }, + { Discover, "discover", 0 }, + { Rediscover, "rediscover", 0 }, + { Remove, "remove", 2 }, + { Unbind, "unbind", 2 }, + }; + + cb = parsecmd(db, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + f = cb->f[1]; + switch(ct->index){ + case Autodiscover: + autodiscover = toggle(f, autodiscover); + break; + case Bind: + netbind(f); + break; + case Debug: + debug = toggle(f, debug); + break; + case Discover: + discoverstr(f); + break; + case Rediscover: + rediscover = toggle(f, rediscover); + break; + case Remove: + removedev(f); + break; + case Unbind: + netunbind(f); + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + free(cb); + return n; +} + +static long +aoewrite(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + case Qzero: + case Qtopdir: + case Qunitdir: + case Qtoplog: + error(Eperm); + case Qtopctl: + return topctlwrite(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitwrite(c, db, n, off); + } +} + +Dev aoedevtab = { + L'æ', + "aoe", + + devreset, + devinit, + devshutdown, + aoeattach, + aoewalk, + aoestat, + aoeopen, + devcreate, + aoeclose, + aoeread, + devbread, + aoewrite, + devbwrite, + devremove, + devwstat, + devpower, + devconfig, +}; --- /sys/src/9/port/sdaoe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/port/sdaoe.c Thu Aug 9 21:43:56 2007 @@ -0,0 +1,650 @@ +/* + * aoe sd driver, copyright © 2007 coraid + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" + +extern char Echange[]; +extern char Enotup[]; + +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Nctlr = 32, + Maxpath = 128, +}; + +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", +}; + +typedef struct Ctlr Ctlr; +struct Ctlr{ + QLock; + + Ctlr *next; + SDunit *unit; + + char path[Maxpath]; + Chan *c; + + ulong vers; + uchar mediachange; + uchar flag; + uchar smart; + uchar smartrs; + uchar feat; + + uvlong sectors; + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + char ident[0x100]; +}; + +static Lock ctlrlock; +static Ctlr *head; +static Ctlr *tail; + +SDifc sdaoeifc; + +static void +idmove(char *p, ushort *a, int n) +{ + int i; + char *op, *e; + + op = p; + for(i = 0; i < n/2; i++){ + *p++ = a[i] >> 8; + *p++ = a[i]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + p = op; + while(*p == ' ') + p++; + memmove(op, p, n - (e - p)); +} + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static u32int +gbit32(void *a) +{ + u32int j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4)<<32 | gbit32(i); +} + +static int +identify(Ctlr *c, ushort *id) +{ + int i; + uchar oserial[21]; + uvlong osectors, s; + + osectors = c->sectors; + memmove(oserial, c->serial, sizeof c->serial); + + c->feat &= ~(Dllba|Dpower|Dsmart|Dnop); + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + c->feat |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + c->feat |= Dpower; + i = gbit16(id+82); + if(i & 1) + c->feat |= Dsmart; + if(i & (1<<14)) + c->feat |= Dnop; + } + + idmove(c->serial, id+10, 20); + idmove(c->firmware, id+23, 8); + idmove(c->model, id+27, 40); + + if((osectors == 0 || osectors != s) && + memcmp(oserial, c->serial, sizeof oserial) != 0){ + c->sectors = s; + c->mediachange = 1; + c->vers++; + } + return 0; +} + +/* must call with d qlocked */ +static int +aoeidentify(Ctlr *d, SDunit *u) +{ + Chan *c; + + c = nil; + if(waserror()){ + if(c) + cclose(c); + iprint("aoeidentify: %s\n", up->errstr); + qunlock(d); + return -1; + } + + uprint("%s/ident", d->path); + c = namec(up->genbuf, Aopen, OREAD, 0); + devtab[c->type]->read(c, d->ident, sizeof d->ident, 0); + + poperror(); + cclose(c); + + d->feat = 0; + d->smart = 0; + identify(d, (ushort*)d->ident); + + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, d->model, 40); + + return 0; +} + +static Ctlr* +ctlrlookup(char *path) +{ + Ctlr *c; + + lock(&ctlrlock); + for(c = head; c; c = c->next) + if(strcmp(c->path, path) == 0) + break; + unlock(&ctlrlock); + return c; +} + +static Ctlr* +newctlr(char *path) +{ + Ctlr *c; + + /* race? */ + if(ctlrlookup(path)) + error(Eexist); + + if((c = malloc(sizeof *c)) == nil) + return 0; + kstrcpy(c->path, path, sizeof c->path); + lock(&ctlrlock); + if(head != nil) + tail->next = c; + else + head = c; + tail = c; + unlock(&ctlrlock); + return c; +} + +static void +delctlr(Ctlr *c) +{ + Ctlr *x, *prev; + + lock(&ctlrlock); + + for(prev = 0, x = head; x; prev = x, x = c->next) + if(strcmp(c->path, x->path) == 0) + break; + if(x == 0){ + unlock(&ctlrlock); + error(Enonexist); + } + + if(prev) + prev->next = x->next; + else + head = x->next; + if(x->next == nil) + tail = prev; + unlock(&ctlrlock); + + if(x->c) + cclose(x->c); + free(x); +} + +static SDev* +aoeprobe(char *path, SDev *s) +{ + int n, i; + char *p; + Chan *c; + Ctlr *ctlr; + + if((p = strrchr(path, '/')) == 0) + error(Ebadarg); + *p = 0; + uprint("%s/ctl", path); + *p = '/'; + + c = namec(up->genbuf, Aopen, OWRITE, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + n = uprint("discover %s", p+1); + devtab[c->type]->write(c, up->genbuf, n, 0); + poperror(); + cclose(c); + + for(i = 0;; i += 200){ + if(i > 8000 || waserror()) + error(Etimedout); + tsleep(&up->sleep, return0, 0, 200); + poperror(); + + uprint("%s/ident", path); + if(waserror()) + continue; + c = namec(up->genbuf, Aopen, OREAD, 0); + poperror(); + cclose(c); + + ctlr = newctlr(path); + break; + } + + if(s == nil && (s = malloc(sizeof *s)) == nil) + return nil; + s->ctlr = ctlr; + s->ifc = &sdaoeifc; + s->nunit = 1; + return s; +} + +static char *probef[32]; +static int nprobe; + +static int +pnpprobeid(char *s) +{ + int id; + + if(strlen(s) < 2) + return 0; + id = 'e'; + if(s[1] == '!') + id = s[0]; + return id; +} + +static SDev* +aoepnp(void) +{ + int i, id; + char *p; + SDev *h, *t, *s; + + if((p = getconf("aoedev")) == 0) + return 0; + nprobe = tokenize(p, probef, nelem(probef)); + h = t = 0; + for(i = 0; i < nprobe; i++){ + id = pnpprobeid(probef[i]); + if(id == 0) + continue; + s = malloc(sizeof *s); + if(s == nil) + break; + s->ctlr = 0; + s->idno = id; + s->ifc = &sdaoeifc; + s->nunit = 1; + + if(h) + t->next = s; + else + h = s; + t = s; + } + return h; +} + +static Ctlr* +pnpprobe(SDev *sd) +{ + int j; + char *p; + static int i; + + if(i > nprobe) + return 0; + p = probef[i++]; + if(strlen(p) < 2) + return 0; + if(p[1] == '!') + p += 2; + + for(j = 0;; j += 200){ + if(j > 8000){ + print("pnpprobe: %s: %s\n", probef[i-1], up->errstr); + return 0; + } + if(waserror()){ + tsleep(&up->sleep, return0, 0, 200); + continue; + } + sd = aoeprobe(p, sd); + poperror(); + break; + } + print("æ: pnpprobe establishes %sin %dms\n", probef[i-1], j); + return sd->ctlr; +} + + +static int +aoeverify(SDunit *u) +{ + SDev *s; + Ctlr *c; + + s = u->dev; + c = s->ctlr; + if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil) + return 0; + c->mediachange = 1; + return 1; +} + +static int +aoeconnect(SDunit *u, Ctlr *c) +{ + qlock(c); + if(waserror()){ + qunlock(c); + return -1; + } + + aoeidentify(u->dev->ctlr, u); + if(c->c) + cclose(c->c); + c->c = 0; + uprint("%s/data", c->path); + c->c = namec(up->genbuf, Aopen, ORDWR, 0); + qunlock(c); + poperror(); + + return 0; +} + +static int +aoeonline(SDunit *u) +{ + Ctlr *c; + int r; + + c = u->dev->ctlr; + r = 0; + + if((c->feat&Datapi) && c->mediachange){ + if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0) + c->mediachange = 0; + return r; + } + + if(c->mediachange){ + if(aoeconnect(u, c) == -1) + return 0; + r = 2; + c->mediachange = 0; + u->sectors = c->sectors; + u->secsize = Aoesectsz; + } else if(devtab[c->c->type]->read(c->c, 0, 0, 0) == 0) + r = 1; + + return r; +} + +static int +aoerio(SDreq *r) +{ + int i, count; + uvlong lba; + char *name; + uchar *cmd; + long (*rio)(Chan*, void*, long, vlong); + Ctlr *c; + SDunit *unit; + + unit = r->unit; + c = unit->dev->ctlr; + if(0 && c->feat & Datapi) + return aoeriopkt(r, d); + + cmd = r->cmd; + name = unit->name; + + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ +// qlock(c); +// i = flushcache(); +// qunlock(c); +// if(i == 0) +// return sdsetsense(r, SDok, 0, 0, 0); + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + + if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){ + r->status = i; + return i; + } + + switch(*cmd){ + case 0x88: + case 0x28: + rio = devtab[c->c->type]->read; + break; + case 0x8a: + case 0x2a: + rio = devtab[c->c->type]->write; + break; + default: + print("%s: bad cmd 0x%.2ux\n", name, cmd[0]); + r->status = SDcheck; + return SDcheck; + } + + if(r->data == nil) + return SDok; + + if(r->clen == 16){ + if(cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 3, 0xc, 2); + lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32; + lba |= cmd[6]<<24 | cmd[7]<<16 | cmd[8]<<8 | cmd[9]; + count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13]; + }else{ + lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5]; + count = cmd[7]<<8 | cmd[8]; + } + + count *= Aoesectsz; + + if(r->dlen < count) + count = r->dlen & ~0x1ff; + + if(waserror()){ + if(strcmp(up->errstr, Echange) == 0 || + strcmp(up->errstr, Enotup) == 0) + unit->sectors = 0; + nexterror(); + } + r->rlen = rio(c->c, r->data, count, Aoesectsz * lba); + poperror(); + r->status = SDok; + return SDok; +} + +static char *smarttab[] = { + "unset", + "error", + "threshold exceeded", + "normal" +}; + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i, m; + + for(i = 0; i < 8; i++){ + m = 1 << i; + if(f & m) + s = seprint(s, e, "%s ", flagname[i]); + } + return seprint(s, e, "\n"); +} + +static int +aoerctl(SDunit *u, char *p, int l) +{ + Ctlr *c; + char *e, *op; + + if((c = u->dev->ctlr) == nil) + return 0; + e = p+l; + op = p; + + p = seprint(p, e, "model\t%s\n", c->model); + p = seprint(p, e, "serial\t%s\n", c->serial); + p = seprint(p, e, "firm %s\n", c->firmware); + if(c->smartrs == 0xff) + p = seprint(p, e, "smart\tenable error\n"); + else if(c->smartrs == 0) + p = seprint(p, e, "smart\tdisabled\n"); + else + p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]); + p = seprint(p, e, "flag "); + p = pflag(p, e, c->feat); + p = seprint(p, e, "geometry %llud %s\n", c->sectors, Aoesectsz); + return p-op; +} + +static int +aoewctl(SDunit *, Cmdbuf *cmd) +{ + cmderror(cmd, Ebadarg); + return 0; +} + +static SDev* +aoeprobew(DevConf *c) +{ + char *p; + + p = strchr(c->type, '/'); + if(p == nil || strlen(p) > Maxpath - 11) + error(Ebadarg); + if(p[1] == '#') + p++; /* hack */ + if(ctlrlookup(p)) + error(Einuse); + return aoeprobe(p, 0); +} + +static void +aoeclear(SDev *s) +{ + delctlr((Ctlr *)s->ctlr); +} + +static char* +aoertopctl(SDev *s, char *p, char *e) +{ + Ctlr *c; + + c = s->ctlr; + return seprint(p, e, "%s aoe %s\n", s->name, c->path); +} + +static int +aoewtopctl(SDev *, Cmdbuf *cmd) +{ + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + } + return 0; +} + +SDifc sdaoeifc = { + "aoe", + + aoepnp, + nil, /* legacy */ + nil, /* enable */ + nil, /* disable */ + + aoeverify, + aoeonline, + aoerio, + aoerctl, + aoewctl, + + scsibio, + aoeprobew, /* probe */ + aoeclear, /* clear */ + aoertopctl, + aoewtopctl, +}; --- /sys/man/3/aoe Thu Jan 1 00:00:00 1970 +++ /sys/man/3/aoe Wed Aug 8 23:59:52 2007 @@ -0,0 +1,252 @@ +.TH AOE 3 +.SH NAME +aoe \- AoE interface +.SH SYNOPSIS +.nf +.B bind -a #æ /dev + +.B /dev/aoe/ctl +.B /dev/aoe/log +.BI /dev/aoe/ n.m /config +.BI /dev/aoe/ n.m /ctl +.BI /dev/aoe/ n.m /devlink/ \fI0\fR +\&... +.BI /dev/aoe/ n.m /devlink/ \fIi\fR +.BI /dev/aoe/ n.m /ident +\&... +.fi +.SH DESCRIPTION +.PP +The AoE (ATA-over-Ethernet) interface serves a three-level +directory providing control and access to AoE targets. +The interface provided is primarily intended for low-level +control of the AoE initiator. See +.IR sdaoe (3) +for the standard interface. +.PP +In order to access AoE targets, one or more Ethernet controllers +need to be bound to the AoE initiator. By default, the system +starts with no interfaces bound. For automatic binding of interfaces +on boot, the +.B aoeif +configuration variable is used in +.IR plan9.ini (8). +Ethernet interfaces are specified as +.BI ether n +not as +.BI #l n \fR.\fI +To bind the first and second Ethernet devices on boot, add +.IP +.EX +aoeif=ether0 ether1 +.EE +.PP +To bind ether1 to a running system: +.IP +.EX +% echo bind '#l1/ether1' > /dev/aoe/ctl +.EE +.PP +And to unbind it +.IP +.EX +% echo unbind '#l1/ether1' > /dev/aoe/ctl +.EE +.PP +When an interfaces are unbound, targets depending +on that interface are removed. +.PP +Each local interface is called a netlink. The mapping of +AoE targets to netlinks is called a devlink. Each devlink may +see multiple interfaces per target. For example, if the local +machine has one Ethernet address bound and the target has +two interfaces on the same Ethernet segment, this will result +in one netlink and one devlink with two Ethernet addresses. +AoE frames are sent in round-robin fashion. Each successive +frame is sent on the next address available on the next available +devlink (local interface). +.PP +Normally the initiator automatically discovers and adds new +device directories on startup. New devices are not added +except as new interfaces are bound to the initiator. There +are several messages that can be sent to the +.B ctl +file which alter this behavior: +.TP +.BI autodiscover\ toggle +If toggle is +.IR nil , +the state of +.B autodiscover +is toggled. If it is the string +.BR on , +it is turned on. Any other string turns +.B autodisover +off. This option is not useful after Ethernet devices have been bound. +.TP +.BI discover\ shelf.slot +Attempt to find the named target all bound interfaces. +.TP +.BI remove\ shelf.slot +The converse of +.BR discover. +Remove the named target if it exists. +.TP +.BI rediscover\ toggle +Allow or disallow rediscovery. This allows for automatic discovery +of new targets. Unfortunately, it also allows automatic modification +or loss of existing targets. This option is considered dangerous. +.PP +Reading the ctl file returns a list of colon-separated lines +.TP +.B debug +.TP +.B autodiscover +.TP +.B rediscover +Returns the current state of the named variable. Writing the variable's +name to the control file toggles the state of that variable. +.TP +.BI if n\ \fLpath +Path to \fInth\fR bound Ethernet device. +.TP +.BI if n\ \fLea +Ethernet address of this device. +.TP +.BI if n\ \fLflag +``Up'' indicates this interface is available. +.TP +.BI if n\ \fLlostjumbo +Number of consecutive lost jumbograms. +.TP +.BI if n\ \fLdatamtu +Incorrect and unused. +.PP +Once configured, each AoE target is accessed via files in the directory named +for its shelf and slot. For example, shelf 42, slot 0 would be +accessed through the path +.LR /dev/aoe/42.0 . +The +.B ident +file contains the read-only, verbatim result of the identify unit ATA command. +The +.B config +file contains the target's AoE configuration string. Writing to this file +sets the targets configuration string. +.PP +Reading the +.B ctl +file returns a list of colon-separated lines: +.TP +.B state +``Up'' or ``down''. +.TP +.B nopen +Number of clients using this target. +.TP +.B nout +Number of outstanding AoE frames. +.TP +.B nmaxout +Maximum number of outstanding frames allowed. +.TP +.B nframes +Maximum number of outstand frames. +.B Nframes +is greater than +.B nmaxout +when the initiator is reducing the number of in-flight +frames due to packet loss. It is assumed that packet +loss is due to an overwhelmed target and not poor +network conditions. +.TP +.BI maxbcount +Maximum number of data bytes per AoE frame. Using +standard frames, +.B maxbcount +is 1024 or two sectors. AoE ATA headers are 36 bytes. +.TP +.B model +.TP +.B serial +.TP +.B firmware +The respective fields from the ATA +.B identify unit +command. +.TP +.B flag +List of flags useful for debugging. The flag +.B jumbo +indicates that jumbo frames are accepted, not that +they are being used. +.B Maxbcount +should be consulted for this purpose. +.PP +The +.B data +file may be read or written like a normal file. Reads and +writes to this file are sent as AoE commands to the target. +The size of this file is the usable size of the target. +.PP +The +.B devlink +directory contains one file for each interface the target was +discovered on. The files are numbers from 0 to +.I n +and contain a list of colon-separated lines: +.TP +.B addr +A space-seperated list of the target's Ethernet addresses visible from +this interface. +.TP +.B npkt +The number of frames sent on this interface. +.TP +.B resent +The number of frames resent. Frames are resent +when they have been outstanding twice the +RTT average. +.TP +.B flag +``Up'' when the netlink is up. +.TP +.B rttavg +.B mintimer +Minimum timer and RTT average as per +.IR "Congestion Avoidance and Control" . +.TP +.B nl path +Path of the Ethernet device. +.TP +.B nl ea +Ethernet address of the local Ethernet device. +.TP +.B nl flag +``Up'' if the local interface is up. +.TP +.B nl lostjumbo +Number of consecutive jumbograms lost. +.TP +.B nl datamtu +Unused. +.PP +.SH SOURCE +.B /sys/src/9/port/devaoe.c +.SH SEE ALSO +.IR cec (1), +.IR snoopy (8), +.IR sd (3), +.IR sdaoe (3), +.IR vblade (1), +.br +.BR http://www.coraid.com/documents/AoEr10.txt , +.br +Van Jacobson and Michael J. Karels, +.IR "``Congestion Avoidance and Control''" , +ACM Computer Communication Review; +Proceedings of the Sigcomm '88 Symposium in Stanford, CA, August, 1988. +.SH BUGS +There is no +.B raw +file for executing arbitrary commands. --- /sys/man/3/sdaoe Thu Jan 1 00:00:00 1970 +++ /sys/man/3/sdaoe Wed Aug 8 23:59:58 2007 @@ -0,0 +1,67 @@ +.TH SDAOE 3 +.SH NAME +sdaoe \- AoE-to-storage device interface quirks +.SH SYNOPSIS +.nf +.B bind #S /dev +.B "echo config switch on spec" \fIl\fP type aoe/aoe/\fI/n.m\fP > /dev/sdctl + +.BI /dev/sd l 0/ctl +.BI /dev/sd l 0/raw +.BI /dev/sd l 0/data +\&... +.fi +.SH DESCRIPTION +.PP +The AoE-to-storage device interface has a few quirks due to the fact that +network-attached storage can't be enumerated like direct-attached storage. +The default first letter for AoE devices is +.LR e . +Each +.B sdaoe +device must be configured explicitly. To configure target +.B 42.0 +on +.BR sde0 : +.IP +.EX +echo config switch on spec e type aoe/aoe/42.0 > /dev/sdctl +.EE +.PP +To turn this device off, +.IP +.EX +echo config switch off spec e > /dev/sdctl +.EE +.PP +To boot from an AoE root, the +.B sd +device must be configured on boot. To accomplish this, +one must either PXE boot or boot from direct-attached +storage and add two configuration lines to +.IR plan9.ini (8). +For example, to boot using target +.B 42.0 +as +.B sde0 +as root over Ethernet interfaces 0 and 1, +.IP +.EX +etherif=ether0 ether1 +aoedev=e!#æ/aoe/42.0 +.EE +.PP +.SH SOURCE +.B /sys/src/9/port/sdaoe.c +.SH SEE ALSO +.IR 9load (8), +.IR cec (1), +.IR snoopy (8), +.IR sd (3), +.IR aoe (3), +.IR vblade (1). +.SH BUGS +It is not currently possible to boot from an AoE target without an +external bootstrap like PXE. +.PP +Devsd is currently limited to LBA32, or 2TB per device.