from cinap Reference: /n/atom/patch/applied/ethervirtio Date: Sun Jan 3 02:23:50 CET 2016 Signed-off-by: quanstro@quanstro.net --- /sys/src/nix/port/ethervirtio.c Thu Jan 1 00:00:00 1970 +++ /sys/src/nix/port/ethervirtio.c Sun Jan 3 02:23:09 2016 @@ -0,0 +1,758 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" + +/* + * virtio ethernet driver + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + * + * TODO + * + * remove paranoid checks + * remove 'cookie' in /^replenish + * implement control queue + */ + +typedef struct Vring Vring; +typedef struct Vdesc Vdesc; +typedef struct Vused Vused; +typedef struct Vheader Vheader; +typedef struct Vqueue Vqueue; +typedef struct Ctlr Ctlr; + +enum { + /* constants */ + Virtpgsz = 4096, + + /* §2.1 Device Status Field */ + Sacknowledge = 1, + Sdriver = 2, + Sdriverok = 4, + Sfeatureok = 8, + Sfailed = 128, + + /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */ + Qdevfeat = 0, + Qdrvfeat = 4, + Qaddr = 8, + Qsize = 12, + Qselect = 14, + Qnotify = 16, + Qstatus = 18, + Qisr = 19, + Qmac = 20, + Qnetstatus = 26, + + /* flags in Qnetstatus */ + Nlinkup = (1<<0), + Nannounce = (1<<1), + + /* feature bits */ + Fmac = (1<<5), + Fstatus = (1<<16), + Fctrlvq = (1<<17), + + /* vring used flags */ + Unonotify = 1, + /* vring avail flags */ + Rnointerrupt = 1, + + /* descriptor flags */ + Dnext = 1, + Dwrite = 2, + Dindirect = 4, + + /* struct sizes */ + VringSize = 4, + VdescSize = 16, + VusedSize = 8, + VheaderSize = 10, + + /* etc */ + Mtu = 1522, + + Vrxq = 0, + Vtxq = 1, + Vctlq = 2, +}; + +struct Vring +{ + u16int flags; + u16int idx; +}; + +struct Vdesc +{ + u64int addr; + u32int len; + u16int flags; + u16int next; +}; + +struct Vused +{ + u32int id; + u32int len; +}; + +struct Vheader +{ + u8int flags; + u8int segtype; + u16int hlen; + u16int seglen; + u16int csumstart; + u16int csumend; +}; + +/* §2.4 Virtqueues */ +struct Vqueue +{ + Lock; + + uint nfree; + uint qsize; + uint qmask; + + Vdesc *desc; + + Vring *avail; + u16int *availent; + u16int *availevent; + + Vring *used; + Vused *usedent; + u16int *usedevent; + u16int lastused; + + void **block; + Vheader **header; +}; + +struct Ctlr { + Lock alock; + QLock; + int state; /* attach */ + + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + u32int feat; + + int nqueue; + + /* virtioether has 3 queues: rx, tx and ctl */ + Vqueue *queue[3]; + + Rendez rrendez; + Rendez trendez; + + /* MAC address */ + uchar ea[Eaddrlen]; +}; + +static Vheader empty; +static Ctlr *ctlrhead; + +/* print a Vqueue */ +static void +pq(Vqueue *q) +{ + uint m; + + m = q->qmask; + iprint("vqueue size %d nfree %ud avail->idx %d used->idx %d lastused %hud", + q->qsize, q->nfree, + q->avail->idx & m, q->used->idx & m, q->lastused & m); +} + +static int +hasroom(Vqueue *vq) +{ + coherence(); + return vq->lastused != vq->used->idx; +} + +/* put new buffers into the receive queue */ +static void +replenish(Ctlr *ctlr) +{ + ushort i, count, idx; + Block *b; + Vqueue *q; + + if(!canqlock(ctlr)) + return; + + q = ctlr->queue[Vrxq]; + count = q->qsize - (q->avail->idx - q->lastused & q->qmask); /* BOTCH */ + for(i = 0; i < count; i++){ + idx = (i + q->avail->idx) & q->qmask; + + if(q->block[idx] != nil){ + print("old block at %d busy\n", idx); + checkb(q->block[idx], "old rx block"); + break; + } + + /* descriptor contains Vheader immediately followed by the packet buffer */ + b = allocb(VheaderSize + Mtu + 2); + +// ??? memset(b->rp, 0, VheaderSize + Mtu + 2); + + /* tail cookie */ + *((u16int*)b->lim - 3) = 0xF0; /* potentially misaligned */ + + q->desc[idx].flags = Dwrite; + q->desc[idx].addr = PCIWADDR(b->rp); + q->desc[idx].len = VheaderSize + Mtu; + q->block[idx] = b; + + q->nfree++; + } + + if(i){ + coherence(); + q->avail->idx += i; + outs(ctlr->port+Qnotify, Vrxq); + } + + qunlock(ctlr); +} + +static int +vhasroom(void *v) +{ + Vqueue *vq; + + vq = v; + return hasroom(vq); +} + +/* free Blocks we've sent */ +static void +txpump(Ether *edev) +{ + Ctlr* ctlr; + Vqueue *q; + Block *b; + uint idx; + + ctlr = edev->ctlr; + q = ctlr->queue[Vtxq]; + + ilock(q); + + if(q->nfree > (q->qsize - (q->qsize / 8))){ + iunlock(q); + return; + } + + q->avail->flags |= Rnointerrupt; + coherence(); + + while(q->lastused != q->used->idx){ + q->lastused++; + q->nfree++; + + /* XXX: > 128 */ + if(q->nfree > q->qsize){ + pq(q); + panic("ethervirtio: txpump: nfree too big"); + } + + idx = q->lastused & q->qmask; + b = q->block[idx]; + q->block[idx] = nil; + freeb(b); + } + + q->avail->flags &= ~Rnointerrupt; + coherence(); + iunlock(q); +} + +static void +transmit(Ether *edev) +{ + Ctlr *ctlr; + Vqueue *q; + Vdesc *d; + Block *b; + int head, block; + + ctlr = edev->ctlr; + q = ctlr->queue[Vtxq]; + + ilock(q); + + if(q->nfree < 2){ + iunlock(q); + return; + } + + b = qget(edev->oq); + if(b == nil){ + iunlock(q); + return; + } + + head = q->avail->idx & q->qmask; + + /* Vheader descriptor */ + d = &q->desc[head]; + d->addr = PCIWADDR(&q->header[head]); + d->len = VheaderSize; + d->flags = Dnext; + + q->block[head] = nil; + + /* Block descriptor */ + block = d->next; + if(q->block[block] != nil){ + pq(q); + checkb(q->block[block], "about to overwrite"); + panic("ethervirtio: overrun"); + } + + q->block[block] = b; + + d = &q->desc[block]; + d->addr = PCIWADDR(b->rp); + d->len = BLEN(b); + d->flags = 0; + + q->nfree -= 2; + + coherence(); + q->avail->idx+=2; + + /* TODO: check notification suppression */ + outs(ctlr->port+Qnotify, Vtxq); + + iunlock(q); +} + +static void +txproc(void *v) +{ + Ether *edev; + Ctlr *ctlr; + + edev = v; + ctlr = edev->ctlr; + + while(waserror()) + ; + + for(;;){ + tsleep(&ctlr->trendez, vhasroom, ctlr->queue[Vtxq], 2000); + txpump(edev); + transmit(edev); + } +} + +/* take one frame from the receive queue */ +static void +rx1(Ether *edev, Vqueue *q, Vused *u) +{ + Block *b; + ushort idx; + + idx = q->lastused & q->qmask; + + /* insanity checks */ + if(u->len > VheaderSize + Mtu) + panic("ethervirtio: rx1: too big"); + if((b = q->block[idx]) == nil) + panic("ethervirtio: rx1: nil"); + if(*((ushort*)b->lim - 3) != 0xF0) + panic("ethervirtio: rx1: bad magic"); + + checkb(b, "rx1"); + + q->block[idx] = nil; + b->rp = b->rp + VheaderSize; + b->wp = b->rp + u->len; + + etheriq(edev, b, 1); + + q->nfree--; +} + +static void +receive(Ether *edev) +{ + Ctlr* ctlr; + Vqueue *q; + Vused *u; + + ctlr = edev->ctlr; + q = ctlr->queue[Vrxq]; + + qlock(ctlr); + + q->avail->flags |= Rnointerrupt; + coherence(); + + while(hasroom(q)){ + u = &q->usedent[q->lastused & q->qmask]; + rx1(edev, q, u); + q->lastused++; + } + + q->avail->flags &= ~Rnointerrupt; + coherence(); + + qunlock(ctlr); +} + +static void +rxproc(void *v) +{ + Ether *edev; + Ctlr *ctlr; + + edev = v; + ctlr = edev->ctlr; + + while(waserror()) + ; + + for(;;){ + replenish(ctlr); + sleep(&ctlr->rrendez, vhasroom, ctlr->queue[Vrxq]); + receive(edev); + } +} + +static void +interrupt(Ureg*, void* arg) +{ + Ether *edev; + Ctlr* ctlr; + + edev = arg; + ctlr = edev->ctlr; + + if(inb(ctlr->port+Qisr) & 1){ + if(hasroom(ctlr->queue[Vrxq])) + wakeup(&ctlr->rrendez); + if(hasroom(ctlr->queue[Vtxq])) + wakeup(&ctlr->trendez); + } +} + +static void +attach(Ether* edev) +{ + char name[KNAMELEN]; + Ctlr* ctlr; + Vqueue *q; + + ctlr = edev->ctlr; + + lock(&ctlr->alock); + if(ctlr->state == 0){ + ctlr->state = 1; + + /* setup tx queue */ + q = ctlr->queue[Vtxq]; + /* BUG: this should be qsize? */ + q->nfree = q->qsize/2; + + /* start kprocs */ + snprint(name, sizeof name, "#l%drx", edev->ctlrno); + kproc(name, rxproc, edev); + snprint(name, sizeof name, "#l%dtx", edev->ctlrno); + kproc(name, txproc, edev); + + /* ready to go */ + outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok); + } + + unlock(&ctlr->alock); +} + +static long +ifstat(Ether *edev, void *a, long n, usize offset) +{ + char *s, *p, *e; + Ctlr *ctlr; + Vqueue *q; + + ctlr = edev->ctlr; + + s = p = smalloc(READSTR); + e = p+READSTR; + + p = seprint(p, e, "devfeat %.32ub\n", ctlr->feat); + p = seprint(p, e, "drvfeat %.32ub\n", inl(ctlr->port+Qdrvfeat)); + p = seprint(p, e, "devstatus %.8ub\n", inb(ctlr->port+Qstatus)); + p = seprint(p, e, "isr %.8ub\n", inb(ctlr->port+Qisr)); + p = seprint(p, e, "netstatus %.8ub\n", inb(ctlr->port+Qnetstatus)); + + q = ctlr->queue[Vrxq]; + p = seprint(p, e, "Vrxq size %d nfree %ud avail->idx %d used->idx %d lastused %hud\n", + q->qsize, q->nfree, q->avail->idx, q->used->idx, q->lastused); + q = ctlr->queue[Vtxq]; + p = seprint(p, e, "Vtxq size %d nfree %ud avail->idx %d used->idx %d lastused %hud\n", + q->qsize, q->nfree, q->avail->idx, q->used->idx, q->lastused); + USED(p); + + n = readstr(offset, a, n, s); + free(s); + + return n; +} + +/* XXX: not done */ +static long +ctl(Ether *, void *, long) +{ + return 0; +} + +/* XXX: not done */ +static void +promiscuous(void *v, int on) +{ + USED(v, on); +} + +/* XXX: not done */ +static void +shutdown(Ether* ether) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + + outb(ctlr->port+Qstatus, 0); +} + +/* XXX: not done */ +static void +multicast(void*, uchar*, int) +{ +} + +/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */ +static usize +queuesize(usize size) +{ + return ROUNDUP(VdescSize*size + sizeof(u16int)*(3+size), Virtpgsz) + + ROUNDUP(VusedSize*size + sizeof(u16int)*(3+size), Virtpgsz); +} + +static Vqueue* +mkqueue(int size) +{ + Vqueue *q; + uchar *p; + int i; + + /* §2.4: Queue Size value is always a power of 2 and <= 32768 */ + assert(!(size & (size - 1)) && size <= 32768); + + q = mallocz(sizeof(Vqueue), 1); + p = mallocalign(queuesize(size), Virtpgsz, 0, 0); + if(p == nil || q == nil){ + print("ethervirtio: no memory for Vqueue\n"); + free(p); + free(q); + return nil; + } + + q->desc = (void*)p; + p += VdescSize*size; + q->avail = (void*)p; + p += VringSize; + q->availent = (void*)p; + p += sizeof(u16int)*size; + q->availevent = (void*)p; + p += sizeof(u16int); + + p = (uchar*)ROUNDUP((uintptr)p, Virtpgsz); + q->used = (void*)p; + p += VringSize; + q->usedent = (void*)p; + p += VusedSize*size; + q->usedevent = (void*)p; + + q->qsize = size; + q->qmask = q->qsize - 1; + q->nfree = 0; + + q->lastused = q->used->idx = 0; + + for(i = 0; i < q->qsize; i++){ + q->availent[i] = i; + } + + for(i = 0; i < q->qsize - 1; i++){ + q->desc[i].next = i + 1; + } + + q->desc[q->qsize - 1].next = 0; + + q->block = mallocz(sizeof(void*) * size, 1); + q->header = mallocz(VheaderSize * size, 1); + + /* + * disable interrupts. virtio spec says we still get interrupts if + * VnotifyEmpty is set in Drvfeat + */ + q->used->flags |= Rnointerrupt; + + return q; +} + +void +pciprobe(int typ) +{ + Ctlr **ll, *c; + Pcidev *p; + int n, i; + + ll = &ctlrhead; + + /* §4.1.2 PCI Device Discovery */ + for(p = nil; p = pcimatch(p, 0, 0);){ + if(p->vid != 0x1AF4 || p->did < 0x1000 || p->did >= 0x1040) + continue; + if(p->rid != 0 || pcicfgr16(p, 0x2E) != typ) + continue; + if((c = malloc(sizeof(Ctlr))) == nil){ + print("ethervirtio: no memory for Ctlr\n"); + break; + } + + c->port = p->mem[0].bar & ~0x1; + + if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){ + print("ethervirtio: port %ux in use\n", c->port); + free(c); + continue; + } + + c->pcidev = p; + + /* §3.1.2 Legacy Device Initialization */ + outb(c->port+Qstatus, 0); + + outb(c->port+Qstatus, Sacknowledge|Sdriver); + + c->feat = inl(c->port+Qdevfeat); + + if((c->feat & (Fmac|Fstatus|Fctrlvq)) != (Fmac|Fstatus|Fctrlvq)){ + print("ethervirtio: feature mismatch %32.32ub\n", c->feat); + outb(c->port+Qstatus, Sfailed); + iofree(c->port); + free(c); + continue; + } + + outl(c->port+Qdrvfeat, Fmac|Fstatus|Fctrlvq); + + /* part of the 1.0 spec, not used in legacy */ + /* + outb(vd->port+Status, inb(vd->port+Status) | FeatureOk); + i = inb(vd->port+Status); + if(!(i & FeatureOk)){ + print("ethervirtio: feature mismatch %32.32lub\n", vd->feat); + outb(vd->port+Status, Failed); + iofree(vd->port); + free(vd); + continue; + } + */ + + /* §4.1.5.1.4 Virtqueue Configuration */ + for(i=0; iqueue); i++){ + outs(c->port+Qselect, i); + n = inl(c->port+Qsize); + if(n == 0 || (n & (n-1)) != 0){ + c->queue[i] = nil; + break; + } + if((c->queue[i] = mkqueue(n)) == nil) + break; + coherence(); + outs(c->port+Qaddr, PCIWADDR(c->queue[i]->desc)/Virtpgsz); + } + c->nqueue = i; + + /* read virtio mac */ + for(i = 0; i < Eaddrlen; i++){ + c->ea[i] = inb(c->port+Qmac+i); + } + + *ll = c; + ll = &c->next; + } +} + + +static int +reset(Ether* edev) +{ + Ctlr *ctlr; + static int once; + + if(once == 0){ + once = 1; + pciprobe(1); + } + + for(ctlr = ctlrhead; ; ctlr = ctlr->next){ + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + break; + } + } + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->link = 1; + + memmove(edev->ea, ctlr->ea, Eaddrlen); + + edev->arg = edev; + + edev->attach = attach; + edev->shutdown = shutdown; + + edev->interrupt = interrupt; + edev->transmit = transmit; + + edev->ifstat = ifstat; + edev->ctl = ctl; + edev->promiscuous = promiscuous; + edev->multicast = multicast; + + return 0; +} + +void +ethervirtiolink(void) +{ + addethercard("ethervirtio", reset); +} +