# HG changeset patch # User Erik Quanstrom # Date 1331047023 -3600 # Node ID e5d11ac11abaa21562e0edd3cfe77e1fc351ffc5 # Parent 4c0cd44c79c3305dd5af1e25732f048ad068ca7b ioapic/trap: overloaded rdts and msi interrupts ioapic/trap: allow for overloaded rdts. unfortunately, the default configuration of most ioapics is such that multiple bus interrupts map to a single i/o apic rdt. this has two implications, 1. we must handle chained interrupts, 2. we must not assign the same rdt two different cpu vectors. i don't believe there is any interaction between this cl and the cl that gorka/jim are working on. traps: support pci message-signaled interrupts (msi) add support for msi interrupts. msi interrupts are never chained, and can be much faster than ioapic interrupts; they're a drop-in replacement. in addition, add the type of interrupt to /dev/irqalloc. (e.g. trap, lapic, ioapic, msi). note: a bug was disovered in the trap allocation scheme. regardless of the setting of dfpolicy, all interrupts allocated during startup (e.g. all ethernet) are assigned to core 0, since it is the only active core. it might make sense to redistribute interrupts when cores are brought on or off line. R=nixiedev, nemo.mbox CC=nix-dev http://codereview.appspot.com/5696093 Committer: Francisco J Ballesteros diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/man/3/arch --- a/sys/man/3/arch Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/man/3/arch Tue Mar 06 16:17:03 2012 +0100 @@ -109,9 +109,11 @@ Reads from .I irqalloc return the enabled interrupts, one line per -interrupt. Each line contains three fields separated by white space: -the trap number, the IRQ it is assigned to, and the name of -the device using it. +interrupt. Each line contains six fields separated by white space: +the trap number, the IRQ it is assigned to, the number of +interrupts from this source, the CPU cycles spent servicing this interrupt, +the type of interrupt, and the name of the device using it. +Interrupt types are architecture dependent. .PP Reads and writes to .IR iob , diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/apic.h --- a/sys/src/nix/k10/apic.h Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/apic.h Tue Mar 06 16:17:03 2012 +0100 @@ -36,7 +36,7 @@ }; enum { - Nbus = 32, + Nbus = 256, Napic = 254, /* xAPIC architectural limit */ Nrdt = 64, }; @@ -87,3 +87,6 @@ extern void apicdump(void); extern void apictimerenab(void); extern void ioapicdump(void); + +extern int pcimsienable(Pcidev*, uvlong); +extern int pcimsimask(Pcidev*, int); diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/i8259.c --- a/sys/src/nix/k10/i8259.c Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/i8259.c Tue Mar 06 16:17:03 2012 +0100 @@ -195,6 +195,7 @@ v->isr = i8259isr; iunlock(&i8259lock); + v->type = "8259"; return IdtPIC+irq; } diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/io.h --- a/sys/src/nix/k10/io.h Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/io.h Tue Mar 06 16:17:03 2012 +0100 @@ -58,19 +58,25 @@ IdtMAX = 255, }; +typedef struct Vkey { + int tbdf; /* pci: ioapic or msi sources */ + int irq; /* 8259-emulating sources */ +} Vkey; + typedef struct Vctl { Vctl* next; /* handlers on this vector */ int isintr; /* interrupt or fault/trap */ - int irq; + Vkey; /* source-specific key; tbdf for pci */ void (*f)(Ureg*, void*); /* handler to call */ void* a; /* argument to call it with */ - int tbdf; char name[KNAMELEN]; /* of driver */ + char *type; int (*isr)(int); /* get isr bit for this irq */ int (*eoi)(int); /* eoi */ + int (*mask)(Vkey*, int); /* interrupt enable returns masked vector */ int vno; } Vctl; diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/ioapic.c --- a/sys/src/nix/k10/ioapic.c Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/ioapic.c Tue Mar 06 16:17:03 2012 +0100 @@ -7,16 +7,23 @@ #include "apic.h" #include "io.h" +typedef struct Rbus Rbus; +typedef struct Rdt Rdt; -typedef struct Rdt Rdt; -typedef struct Rdt { - Apic* apic; +struct Rbus { + Rbus *next; int devno; + Rdt *rdt; +}; + +struct Rdt { + Apic *apic; int intin; u32int lo; - Rdt* next; /* on this bus */ -} Rdt; + int ref; /* could map to multiple busses */ + int enabled; /* times enabled */ +}; enum { /* IOAPIC registers */ Ioregsel = 0x00, /* indirect register address */ @@ -34,7 +41,7 @@ static Rdt rdtarray[Nrdt]; static int nrdtarray; static int gsib; -static Rdt* rdtbus[Nbus]; +static Rbus* rdtbus[Nbus]; static Rdt* rdtvecno[IdtMAX+1]; static Lock idtnolock; @@ -45,18 +52,12 @@ static void rtblget(Apic* apic, int sel, u32int* hi, u32int* lo) { - u32int r; - sel = Ioredtbl + 2*sel; *(apic->addr+Ioregsel) = sel+1; - r = *(apic->addr+Iowin); - if(hi) - *hi = r; + *hi = *(apic->addr+Iowin); *(apic->addr+Ioregsel) = sel; - r = *(apic->addr+Iowin); - if(lo) - *lo = r; + *lo = *(apic->addr+Iowin); } static void @@ -70,9 +71,24 @@ *(apic->addr+Iowin) = lo; } +Rdt* +rdtlookup(Apic *apic, int intin) +{ + int i; + Rdt *r; + + for(i = 0; i < nrdtarray; i++){ + r = rdtarray + i; + if(apic == r->apic && intin == r->intin) + return r; + } + return nil; +} + void ioapicintrinit(int busno, int apicno, int intin, int devno, u32int lo) { + Rbus *rbus; Rdt *rdt; Apic *apic; @@ -82,13 +98,26 @@ if(!apic->useable || intin >= apic->nrdt) return; - rdt = &rdtarray[nrdtarray++]; - rdt->apic = apic; - rdt->devno = devno; - rdt->intin = intin; - rdt->lo = lo; - rdt->next = rdtbus[busno]; - rdtbus[busno] = rdt; + rdt = rdtlookup(apic, intin); + if(rdt == nil){ + rdt = &rdtarray[nrdtarray++]; + rdt->apic = apic; + rdt->intin = intin; + rdt->lo = lo; + }else{ + if(lo != rdt->lo){ + print("mutiple irq botch bus %d %d/%d/%d lo %d vs %d\n", + busno, apicno, intin, devno, lo, rdt->lo); + return; + } + DBG("dup rdt %d %d %d %d %.8ux\n", busno, apicno, intin, devno, lo); + } + rdt->ref++; + rbus = malloc(sizeof *rbus); + rbus->rdt = rdt; + rbus->devno = devno; + rbus->next = rdtbus[busno]; + rdtbus[busno] = rbus; } void @@ -128,35 +157,35 @@ ioapicdump(void) { int i, n; + Rbus *rbus; Rdt *rdt; Apic *apic; u32int hi, lo; if(!DBGFLG) return; - for(i = 0; i < Napic; i++){ apic = &xioapic[i]; if(!apic->useable || apic->addr == 0) continue; - DBG("ioapic %d addr %#p nrdt %d gsib %d\n", + print("ioapic %d addr %#p nrdt %d gsib %d\n", i, apic->addr, apic->nrdt, apic->gsib); for(n = 0; n < apic->nrdt; n++){ lock(apic); rtblget(apic, n, &hi, &lo); unlock(apic); - DBG(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); + print(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); } } for(i = 0; i < Nbus; i++){ - if((rdt = rdtbus[i]) == nil) + if((rbus = rdtbus[i]) == nil) continue; - DBG("iointr bus %d:\n", i); - while(rdt != nil){ - DBG(" apic %ld devno %#ux (%d %d) intin %d lo %#ux\n", - rdt->apic-xioapic, rdt->devno, rdt->devno>>2, - rdt->devno & 0x03, rdt->intin, rdt->lo); - rdt = rdt->next; + print("iointr bus %d:\n", i); + for(; rbus != nil; rbus = rbus->next){ + rdt = rbus->rdt; + print(" apic %ld devno %#ux (%d %d) intin %d lo %#ux ref %d\n", + rdt->apic-xioapic, rbus->devno, rbus->devno>>2, + rbus->devno & 0x03, rdt->intin, rdt->lo, rdt->ref); } } } @@ -176,8 +205,7 @@ unlock(apic); } } - if(DBGFLG) - ioapicdump(); + ioapicdump(); } static int dfpolicy = 0; @@ -237,8 +265,70 @@ } int +nextvec(void) +{ + uint vecno; + + lock(&idtnolock); + vecno = idtno; + idtno = (idtno+8) % IdtMAX; + if(idtno < IdtIOAPIC) + idtno += IdtIOAPIC; + unlock(&idtnolock); + + return vecno; +} + +static int +msimask(Vkey *v, int mask) +{ + Pcidev *p; + + p = pcimatchtbdf(v->tbdf); + if(p == nil) + return -1; + return pcimsimask(p, mask); +} + +static int +intrenablemsi(Vctl* v, Pcidev *p) +{ + uint vno, lo, hi; + uvlong msivec; + + vno = nextvec(); + + lo = IPlow | TMedge | vno; + ioapicintrdd(&hi, &lo); + + if(lo & Lm) + lo |= MTlp; + + msivec = (uvlong)hi<<32 | lo; + if(pcimsienable(p, msivec) == -1) + return -1; + v->isr = apicisr; + v->eoi = apiceoi; + v->vno = vno; + v->type = "msi"; + v->mask = msimask; + + DBG("msiirq: %T: enabling %.16llux %s irq %d vno %d\n", p->tbdf, msivec, v->name, v->irq, vno); + return vno; +} + +int +disablemsi(Vctl*, Pcidev *p) +{ + if(p == nil) + return -1; + return pcimsimask(p, 1); +} + +int ioapicintrenable(Vctl* v) { + Rbus *rbus; Rdt *rdt; u32int hi, lo; int busno, devno, vecno; @@ -251,6 +341,7 @@ if(v->irq >= IrqLINT0 && v->irq <= MaxIrqLAPIC){ if(v->irq != IrqSPURIOUS) v->isr = apiceoi; + v->type = "lapic"; return v->irq; } else{ @@ -277,6 +368,9 @@ busno = BUSBNO(v->tbdf); if((pcidev = pcimatchtbdf(v->tbdf)) == nil) panic("no PCI dev for tbdf %#8.8ux\n", v->tbdf); + if((vecno = intrenablemsi(v, pcidev)) != -1) + return vecno; + disablemsi(v, pcidev); if((devno = pcicfgr8(pcidev, PciINTP)) == 0) panic("no INTP for tbdf %#8.8ux\n", v->tbdf); devno = BUSDNO(v->tbdf)<<2|(devno-1); @@ -288,10 +382,12 @@ panic("unknown tbdf %#8.8ux\n", v->tbdf); } - for(rdt = rdtbus[busno]; rdt != nil; rdt = rdt->next){ - if(rdt->devno == devno) + rdt = nil; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; break; - } + } if(rdt == nil){ extern int mpisabusno; @@ -304,10 +400,11 @@ if((busno = mpisabusno) == -1) return -1; devno = v->irq<<2; - for(rdt = rdtbus[busno]; rdt != nil; rdt = rdt->next){ - if(rdt->devno == devno) + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; break; - } + } DBG("isa: tbdf %#8.8ux busno %d devno %d %#p\n", v->tbdf, busno, devno, rdt); } @@ -327,18 +424,15 @@ * rather than putting a Lock in each entry. */ lock(rdt->apic); + DBG("%T: %ld/%d/%d (%d)\n", v->tbdf, rdt->apic - xioapic, rbus->devno, rdt->intin, devno); if((rdt->lo & 0xff) == 0){ - lock(&idtnolock); - vecno = idtno; - idtno = (idtno+8) % IdtMAX; - if(idtno < IdtIOAPIC) - idtno += IdtIOAPIC; - unlock(&idtnolock); - + vecno = nextvec(); rdt->lo |= vecno; rdtvecno[vecno] = rdt; - } + }else + DBG("%T: mutiple irq bus %d dev %d\n", v->tbdf, busno, devno); + rdt->enabled++; lo = (rdt->lo & ~Im); ioapicintrdd(&hi, &lo); rtblput(rdt->apic, rdt->intin, hi, lo); @@ -350,6 +444,7 @@ v->isr = apicisr; v->eoi = apiceoi; v->vno = vecno; + v->type = "ioapic"; return vecno; } @@ -377,7 +472,9 @@ } lock(rdt->apic); - rtblput(rdt->apic, rdt->intin, 0, rdt->lo); + rdt->enabled--; + if(rdt->enabled == 0) + rtblput(rdt->apic, rdt->intin, 0, rdt->lo); unlock(rdt->apic); return 0; diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/k8cpu --- a/sys/src/nix/k10/k8cpu Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/k8cpu Tue Mar 06 16:17:03 2012 +0100 @@ -54,7 +54,7 @@ misc +dev # cache - mp apic ioapic pci sipi + mp apic ioapic msi pci sipi # #boot cpu diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/msi.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/src/nix/k10/msi.c Tue Mar 06 16:17:03 2012 +0100 @@ -0,0 +1,117 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "apic.h" + +enum { + Dpcicap = 1<<0, + Dmsicap = 1<<1, + Dvec = 1<<2, + Debug = 0, +}; + +enum { + /* address */ + Msiabase = 0xfee00000u, + Msiadest = 1<<12, /* same as 63:56 of apic vector */ + Msiaedest = 1<<4, /* same as 55:48 of apic vector */ + Msialowpri = 1<<3, /* redirection hint */ + Msialogical = 1<<2, + + /* data */ + Msidlevel = 1<<15, + Msidassert = 1<<14, + Msidlogical = 1<<11, + Msidmode = 1<<8, /* 3 bits; delivery mode */ + Msidvector = 0xff<<0, +}; + +enum{ + /* msi capabilities */ + Vmask = 1<<8, + Cap64 = 1<<7, + Mmesgmsk = 7<<4, + Mmcap = 7<<1, + Msienable = 1<<0, +}; + +static int +msicap(Pcidev *p) +{ + int c; + + c = pcicap(p, PciCapMSI); + if(c == -1) + return 0; + return c; +} + +static int +blacklist(Pcidev *p) +{ + switch(p->vid<<16 | p->did){ + case 0x11ab<<16 | 0x6485: + return -1; + } + return 0; +} + +int +pcimsienable(Pcidev *p, uvlong vec) +{ + char *s; + uint c, f, d, datao, lopri, dmode, logical; + + c = msicap(p); + if(c == 0) + return -1; + + f = pcicfgr16(p, c + 2) & ~Mmesgmsk; + + if(blacklist(p) != 0) + return -1; + datao = 8; + d = vec>>48; + lopri = (vec & 0x700) == MTlp; + logical = (vec & Lm) != 0; + pcicfgw32(p, c + 4, Msiabase | Msiaedest * d + | Msialowpri * lopri | Msialogical * logical); + if(f & Cap64){ + datao += 4; + pcicfgw32(p, c + 8, 0); + } + dmode = (vec >> 8) & 7; + pcicfgw16(p, c + datao, Msidassert | Msidlogical * logical + | Msidmode * dmode | (uint)vec & 0xff); + if(f & Vmask) + pcicfgw32(p, c + datao + 4, 0); + + /* leave vectors configured but disabled for debugging */ + if((s = getconf("*nomsi")) != nil && atoi(s) != 0) + return -1; + + pcicfgw16(p, c + 2, f); + return 0; +} + +int +pcimsimask(Pcidev *p, int mask) +{ + uint c, f; + + c = msicap(p); + if(c == 0) + return -1; + f = pcicfgr16(p, c + 2) & ~Msienable; + if(mask){ + pcicfgw16(p, c + 2, f & ~Msienable); +// pciclrbme(p); cheeze + }else{ + pcisetbme(p); + pcicfgw16(p, c + 2, f | Msienable); + } + return 0; +} diff -r 4c0cd44c79c3 -r e5d11ac11aba sys/src/nix/k10/trap.c --- a/sys/src/nix/k10/trap.c Tue Mar 06 14:42:52 2012 +0100 +++ b/sys/src/nix/k10/trap.c Tue Mar 06 16:17:03 2012 +0100 @@ -24,11 +24,12 @@ static Lock vctllock; static Vctl *vctl[256]; -enum -{ - Ntimevec = 20 /* number of time buckets for each intr */ +typedef struct Intrtime Intrtime; +struct Intrtime { + uvlong count; + uvlong cycles; }; -ulong intrtimes[256][Ntimevec]; +static Intrtime intrtimes[256]; void* intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) @@ -62,17 +63,22 @@ return nil; } if(vctl[vno]){ - iunlock(&vctllock); - panic("vno %d for %s already allocated by %s\n", - vno, v->name, vctl[vno]->name); + if(vctl[v->vno]->isr != v->isr || vctl[v->vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[v->vno]->name, v->name, + vctl[v->vno]->isr, v->isr, vctl[v->vno]->eoi, v->eoi); } v->vno = vno; + v->next = vctl[vno]; vctl[vno] = v; iunlock(&vctllock); + if(v->mask) + v->mask(v, 0); + /* * Return the assigned vector so intrdisable can find - * the handler; the IRQ is useless in the wondrefule world + * the handler; the IRQ is useless in the wonderful world * of the IOAPIC. */ return v; @@ -81,28 +87,36 @@ int intrdisable(void* vector) { - Vctl *v; + Vctl *v, *x, **ll; extern int ioapicintrdisable(int); ilock(&vctllock); v = vector; if(v == nil || vctl[v->vno] != v) panic("intrdisable: v %#p", v); + for(ll = vctl+v->vno; x = *ll; ll = &x->next) + if(v == x) + break; + if(x != v) + panic("intrdisable: v %#p", v); + if(v->mask) + v->mask(v, 1); + v->f(nil, v->a); + *ll = v->next; ioapicintrdisable(v->vno); - vctl[v->vno] = nil; iunlock(&vctllock); free(v); - return 0; } static long irqallocread(Chan*, void *vbuf, long n, vlong offset) { - char *buf, *p, str[2*(11+1)+KNAMELEN+1+1]; + char *buf, *p, str[2*(11+1)+2*(20+1)+(KNAMELEN+1)+(8+1)+1]; int m, vno; long oldn; + Intrtime *t; Vctl *v; if(n < 0 || offset < 0) @@ -112,7 +126,9 @@ buf = vbuf; for(vno=0; vnonext){ - m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name); + t = intrtimes + vno; + m = snprint(str, sizeof str, "%11d %11d %20llud %20llud %-*.*s %.*s\n", + vno, v->irq, t->count, t->cycles, 8, 8, v->type, KNAMELEN, v->name); if(m <= offset) /* if do not want this, skip entry */ offset -= m; else{ @@ -144,6 +160,7 @@ if(vno < 0 || vno >= 256) panic("trapenable: vno %d\n", vno); v = malloc(sizeof(Vctl)); + v->type = "trap"; v->tbdf = BUSUNKNOWN; v->f = f; v->a = a; @@ -151,8 +168,7 @@ v->name[KNAMELEN-1] = 0; ilock(&vctllock); - if(vctl[vno]) - v->next = vctl[vno]->next; + v->next = vctl[vno]; vctl[vno] = v; iunlock(&vctllock); } @@ -229,13 +245,12 @@ }; /* - * keep histogram of interrupt service times + * keep interrupt service times and counts */ void -intrtime(Mach*, int vno) +intrtime(int vno) { - ulong diff; - ulong x; + ulong diff, x; /* should be uvlong */ x = perfticks(); diff = x - m->perf.intrts; @@ -245,10 +260,8 @@ if(up == nil && m->perf.inidle > diff) m->perf.inidle -= diff; - diff /= m->cpumhz*100; // quantum = 100µsec - if(diff >= Ntimevec) - diff = Ntimevec-1; - intrtimes[vno][diff]++; + intrtimes[vno].cycles += diff; + intrtimes[vno].count++; } static void @@ -349,9 +362,8 @@ } if(ctl->eoi) ctl->eoi(vno); + intrtime(vno); if(ctl->isintr){ - intrtime(m, vno); - if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) clockintr = 1; @@ -361,7 +373,7 @@ } else if(vno < nelem(excname) && user){ spllo(); - sprint(buf, "sys: trap: %s", excname[vno]); + snprint(buf, sizeof buf, "sys: trap: %s", excname[vno]); postnote(up, 1, buf, NDebug); } else if(vno >= VectorPIC && vno != VectorSYSCALL){ @@ -379,7 +391,7 @@ iprint("cpu%d: spurious interrupt %d, last %d\n", m->machno, vno, m->lastintr); - + intrtime(vno); if(user) kexit(ureg); return; @@ -394,10 +406,6 @@ } } dumpregs(ureg); -#ifdef notdef - iprint("vno %d: buggeration @ %#p...\n", vno, ureg->ip); - i8042reset(); -#else if(!user){ ureg->sp = PTR2UINT(&ureg->sp); dumpstackwithureg(ureg); @@ -405,7 +413,6 @@ if(vno < nelem(excname)) panic("%s", excname[vno]); panic("unknown trap/intr: %d\n", vno); -#endif /* notdef */ } splhi(); @@ -511,18 +518,19 @@ static void dumpstackwithureg(Ureg* ureg) { + char *s; uintptr l, v, i, estack; extern ulong etext; int x; - if(getconf("*nodumpstack")){ + if((s = getconf("*nodumpstack")) != nil && atoi(s) != 0){ iprint("dumpstack disabled\n"); return; } iprint("dumpstack\n"); x = 0; - x += iprint("ktrace 9k8cpu %#p %#p\n", ureg->ip, ureg->sp); + x += iprint("ktrace 9%s %#p %#p\n", strrchr(conffile, '/')+1, ureg->ip, ureg->sp); i = 0; if(up != nil // && (uintptr)&l >= (uintptr)up->kstack