from cinap, experimental support for virtio. Reference: /n/atom/patch/applied/virtioscsik10 Date: Sat Jun 7 06:07:16 CES 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/nix/k10/sdvirtio.c Sat Jun 7 06:06:29 2014 +++ /sys/src/nix/k10/sdvirtio.c Sat Jun 7 06:06:30 2014 @@ -15,6 +15,14 @@ typedef struct Vqueue Vqueue; typedef struct Vdev Vdev; +typedef struct ScsiCfg ScsiCfg; + +/* device types */ +enum { + TypBlk = 2, + TypSCSI = 8, +}; + /* status flags */ enum { Acknowledge = 1, @@ -72,6 +80,10 @@ struct Vqueue { Lock; + + Vdev *dev; + int idx; + int size; int free; @@ -103,9 +115,30 @@ int nqueue; Vqueue *queue[16]; + void *cfg; /* device specific config (for scsi) */ + Vdev *next; }; +enum { + CDBSIZE = 32, + SENSESIZE = 96, +}; + +struct ScsiCfg +{ + u32int num_queues; + u32int seg_max; + u32int max_sectors; + u32int cmd_per_lun; + u32int event_info_size; + u32int sense_size; + u32int cdb_size; + u16int max_channel; + u16int max_target; + u32int max_lun; +}; + static Vqueue* mkvqueue(int size) { @@ -160,13 +193,14 @@ viopnpdevs(int typ) { Vdev *vd, *h, **hh; + Vqueue *q; Pcidev *p; int n, i; h = nil; hh = &h; for(p = nil; p = pcimatch(p, 0x1af4, 0);){ - if((p->did < 0x1000) || (p->did >= 0x1040)) + if((p->did < 0x1000) || (p->did > 0x103F)) continue; if(p->rid != 0) continue; @@ -178,7 +212,7 @@ } vd->port = p->mem[0].bar & ~0x1; if(ioalloc(vd->port, p->mem[0].size, 0, "virtio") < 0){ - print("sdvirtio: port %lux in use\n", vd->port); + print("sdvirtio: port %ux in use\n", vd->port); free(vd); continue; } @@ -195,9 +229,12 @@ n = ins(vd->port+Qsize); if(n == 0 || (n & (n-1)) != 0) break; - if((vd->queue[i] = mkvqueue(n)) == nil) + if((q = mkvqueue(n)) == nil) break; - coherence(); /* ? virtual machine can be uncoherent */ + q->dev = vd; + q->idx = i; + vd->queue[i] = q; + coherence(); outl(vd->port+Qaddr, PADDR(vd->queue[i]->desc)/PGSZ); } vd->nqueue = i; @@ -215,39 +252,42 @@ }; static void -viointerrupt(Ureg *, void *arg) +vqinterrupt(Vqueue *q) { int id, free, m; struct Rock *r; Rendez *z; - Vqueue *q; - Vdev *vd; - vd = arg; - if(inb(vd->port+Isr) & 1){ - q = vd->queue[0]; - m = q->size-1; + m = q->size-1; - ilock(q); - while((q->lastused ^ q->used->idx) & m){ - id = q->usedent[q->lastused++ & m].id; - if(r = q->rock[id]){ - q->rock[id] = nil; - z = r->sleep; - r->done = 1; /* hands off */ - if(z != nil) - wakeup(z); - } - do { - free = id; - id = q->desc[free].next; - q->desc[free].next = q->free; - q->free = free; - q->nfree++; - } while(q->desc[free].flags & Next); + ilock(q); + while((q->lastused ^ q->used->idx) & m){ + id = q->usedent[q->lastused++ & m].id; + if(r = q->rock[id]){ + q->rock[id] = nil; + z = r->sleep; + r->done = 1; /* hands off */ + if(z != nil) + wakeup(z); } - iunlock(q); + do { + free = id; + id = q->desc[free].next; + q->desc[free].next = q->free; + q->free = free; + q->nfree++; + } while(q->desc[free].flags & Next); } + iunlock(q); +} + +static void +viointerrupt(Ureg *, void *arg) +{ + Vdev *vd = arg; + + if(inb(vd->port+Isr) & 1) + vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]); } static int @@ -257,7 +297,7 @@ } static int -vioreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba) +vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba) { struct Rock rock; int free, head; @@ -265,7 +305,7 @@ Vdesc *d; u8int status; - struct Vioreqhdr { + struct Vioblkreqhdr { u32int typ; u32int prio; u64int lba; @@ -316,7 +356,7 @@ coherence(); q->availent[q->avail->idx++ & (q->size-1)] = head; coherence(); - outs(vd->port+Qnotify, 0); + outs(vd->port+Qnotify, q->idx); iunlock(q); while(!rock.done){ @@ -326,33 +366,150 @@ poperror(); if(!rock.done) - viointerrupt(nil, vd); + vqinterrupt(q); } return status; } +static int +vioscsireq(SDreq *r) +{ + uchar resp[4+4+2+2+SENSESIZE], req[8+8+3+CDBSIZE]; + struct Rock rock; + int free, head; + u32int len; + Vqueue *q; + Vdesc *d; + Vdev *vd; + SDunit *u; + ScsiCfg *cfg; + + u = r->unit; + vd = u->dev->ctlr; + cfg = vd->cfg; + + memset(resp, 0, sizeof(resp)); + memset(req, 0, sizeof(req)); + req[0] = 1; + req[1] = u->subno; + req[2] = r->lun>>8; + req[3] = r->lun&0xFF; + *(u64int*)(&req[8]) = (uintptr)r; + + memmove(&req[8+8+3], r->cmd, r->clen); + + rock.done = 0; + rock.sleep = &up->sleep; + + q = vd->queue[2]; + ilock(q); + while(q->nfree < 3){ + iunlock(q); + + if(!waserror()) + tsleep(&up->sleep, return0, 0, 500); + poperror(); + + ilock(q); + } + + head = free = q->free; + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(req); + d->len = 8+8+3+cfg->cdb_size; + d->flags = Next; + + if(r->write && r->dlen > 0){ + d = &q->desc[free]; free = d->next; + d->addr = PADDR(r->data); + d->len = r->dlen; + d->flags = Next; + } + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(resp); + d->len = 4+4+2+2+cfg->sense_size; + d->flags = Write; + + if(!r->write && r->dlen > 0){ + d->flags |= Next; + + d = &q->desc[free]; free = d->next; + d->addr = PADDR(r->data); + d->len = r->dlen; + d->flags = Write; + } + + q->free = free; + q->nfree -= 2 + (r->dlen > 0); + + q->rock[head] = &rock; + +// coherence(); + q->availent[q->avail->idx++ & (q->size-1)] = head; + coherence(); + outs(vd->port+Qnotify, q->idx); + iunlock(q); + + while(!rock.done){ + while(waserror()) + ; + tsleep(rock.sleep, viodone, &rock, 1000); + poperror(); + + if(!rock.done) + vqinterrupt(q); + } + + /* response+status */ + r->status = resp[10]; + if(resp[11] != 0) + r->status = SDcheck; + + /* sense_len */ + len = *((u32int*)&resp[0]); + if(len > 0){ + if(len > sizeof(r->sense)) + len = sizeof(r->sense); + memmove(r->sense, &resp[4+4+2+2], len); + r->flags |= SDvalidsense; + } + + /* data residue */ + len = *((u32int*)&resp[4]); + if(len > r->dlen) + r->rlen = 0; + else + r->rlen = r->dlen - len; + + return r->status; + +} + static long -viobio(SDunit *u, int, int write, void *a, long count, uvlong lba) +viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba) { long ss, cc, max, ret; Vdev *vd; - max = 32; - ss = u->secsize; vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsibio(u, lun, write, a, count, lba); + max = 32; + ss = u->secsize; ret = 0; while(count > 0){ if((cc = count) > max) cc = max; - if(vioreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0) + if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0) error(Eio); ret += cc*ss; count -= cc; lba += cc; } - return ret; } @@ -362,10 +519,14 @@ int i, count, rw; uvlong lba; SDunit *u; + Vdev *vd; u = r->unit; + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return vioscsireq(r); if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ - if(vioreq(u->dev->ctlr, 4, nil, 0, 0, 0) != 0) + if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0) return sdsetsense(r, SDcheck, 3, 0xc, 2); return sdsetsense(r, SDok, 0, 0, 0); } @@ -384,6 +545,9 @@ Vdev *vd; vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsionline(u); + cap = inl(vd->port+Devspec+4); cap <<= 32; cap |= inl(vd->port+Devspec); @@ -396,13 +560,26 @@ } static int -vioverify(SDunit *) +vioverify(SDunit *u) { + Vdev *vd; + + vd = u->dev->ctlr; + if(vd->typ == TypSCSI) + return scsiverify(u); + return 1; } SDifc sdvirtioifc; +static void +vdevenable(Vdev *vd) +{ + intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, "virtio"); + outb(vd->port+Status, inb(vd->port+Status) | DriverOk); +} + static SDev* viopnp(void) { @@ -413,12 +590,11 @@ id = 'F'; h = nil; hh = &h; - for(vd = viopnpdevs(2); vd != nil; vd = vd->next){ + for(vd = viopnpdevs(TypBlk); vd != nil; vd = vd->next){ if(vd->nqueue != 1) continue; - intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, "virtio"); - outb(vd->port+Status, inb(vd->port+Status) | DriverOk); + vdevenable(vd); if((s = malloc(sizeof(*s))) == nil) break; @@ -426,6 +602,50 @@ s->idno = id++; s->ifc = &sdvirtioifc; s->nunit = 1; + *hh = s; + hh = &s->next; + } + + id = '0'; + for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){ + ScsiCfg *cfg; + + if(vd->nqueue < 3) + continue; + + if((cfg = malloc(sizeof(*cfg))) == nil) + break; + cfg->num_queues = inl(vd->port+Devspec+4*0); + cfg->seg_max = inl(vd->port+Devspec+4*1); + cfg->max_sectors = inl(vd->port+Devspec+4*2); + cfg->cmd_per_lun = inl(vd->port+Devspec+4*3); + cfg->event_info_size = inl(vd->port+Devspec+4*4); + cfg->sense_size = inl(vd->port+Devspec+4*5); + cfg->cdb_size = inl(vd->port+Devspec+4*6); + cfg->max_channel = ins(vd->port+Devspec+4*7); + cfg->max_target = ins(vd->port+Devspec+4*7+2); + cfg->max_lun = inl(vd->port+Devspec+4*8); + + if(cfg->max_target == 0){ + free(cfg); + continue; + } + if((cfg->cdb_size > CDBSIZE) || (cfg->sense_size > SENSESIZE)){ + print("sdvirtio: cdb %ud or sense size %ud too big\n", + cfg->cdb_size, cfg->sense_size); + free(cfg); + continue; + } + vd->cfg = cfg; + + vdevenable(vd); + + if((s = malloc(sizeof(*s))) == nil) + break; + s->ctlr = vd; + s->idno = id++; + s->ifc = &sdvirtioifc; + s->nunit = cfg->max_target; *hh = s; hh = &s->next; } --- /sys/src/nix/k10/cpu Sat Jun 7 06:06:32 2014 +++ /sys/src/nix/k10/cpu Sat Jun 7 06:06:33 2014 @@ -16,7 +16,7 @@ kprof # segment acpi - ws + ws2 ether netif cec @@ -46,7 +46,8 @@ ether8169 pci ethermii # ether82557 pci - ether82563 pci +# ether82563 pci + ether82563x pci etherbcm pci ethermii etherigbe pci ethermii @@ -56,8 +57,11 @@ etheri40 pci + etherusb + ethermedium -# loopbackmedium +# must have for venti (please fix venti) + loopbackmedium # netdevmedium usbuhci usbohci @@ -70,11 +74,12 @@ sdodin sdscsifis sdatafis sdvanir sdscsifis sdatafis sdmpt2 sdscsifis sdatafis + sdvirtio sdscsi sdloop misc +dev -# mp mpacpi lapic ioapic msi pci sipi - acpi acpiio lapic ioapic msi pci sipi +# mp mpacpi lapic ioapic msi msix pci sipi + acpi acpiio lapic ioapic msi msix pci sipi boot cpu il --- /sys/src/nix/k10/term Sat Jun 7 06:06:34 2014 +++ /sys/src/nix/k10/term Sat Jun 7 06:06:35 2014 @@ -17,7 +17,7 @@ # segment acpi audio - ws + ws2 ether netif cec @@ -82,11 +82,12 @@ sdodin sdscsifis sdatafis sdvanir sdscsifis sdatafis sdmpt2 sdscsifis sdatafis + sdvirtio sdscsi sdloop misc +dev # mp mpacpi lapic ioapic msi pci sipi - acpi acpiio lapic ioapic msi pci sipi + acpi acpiio lapic ioapic msi msix pci sipi boot terminal il --- /sys/src/nix/port/sdscsi.c Sat Jun 7 06:06:37 2014 +++ /sys/src/nix/port/sdscsi.c Sat Jun 7 06:06:38 2014 @@ -303,56 +303,6 @@ return 0; } -int -scsiexec(SDunit* unit, int write, uchar* cmd, int clen, void* data, int* dlen) -{ - SDreq *r; - int status; - - if((r = malloc(sizeof(SDreq))) == nil) - return SDmalloc; - r->unit = unit; - r->lun = cmd[1]>>5; /* ??? */ - r->write = write; - memmove(r->cmd, cmd, clen); - r->clen = clen; - r->data = data; - if(dlen) - r->dlen = *dlen; - r->flags = 0; - - r->status = ~0; - - /* - * Call the device-specific I/O routine. - * There should be no calls to 'error()' below this - * which percolate back up. - */ - switch(status = unit->dev->ifc->rio(r)){ - case SDok: - if(dlen) - *dlen = r->rlen; - /*FALLTHROUGH*/ - case SDcheck: - /*FALLTHROUGH*/ - default: - /* - * It's more complicated than this. There are conditions - * which are 'ok' but for which the returned status code - * is not 'SDok'. - * Also, not all conditions require a reqsense, might - * need to do a reqsense here and make it available to the - * caller somehow. - * - * MaƱana. - */ - break; - } - sdfree(r); - - return status; -} - static void scsifmt10(SDreq *r, int write, int lun, ulong nb, uvlong bno) {