amd64 port of the fs kernel. this port supports multiprocessing. acpi is used rather than the mp tables for discovering ioapics &c. the underpinnings of the amd64 port are very close to the nix kernel. in fact the recent nix support for acpi came from this kernel. Reference: /n/atom/patch/applied2013/fsamd64 Date: Tue Aug 27 16:14:02 CES 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,402 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" + +enum { + Development = 1, /* i.e., debugging */ + DLE = 0x10, /* ^p == DLE */ + Asciimask = 0x7f, +}; + +/* + * INS8250 uart + */ +enum +{ + /* + * register numbers + */ + Data= 0, /* xmit/rcv buffer */ + Iena= 1, /* interrupt enable */ + Ircv= (1<<0), /* for char rcv'd */ + Ixmt= (1<<1), /* for xmit buffer empty */ + Irstat=(1<<2), /* for change in rcv'er status */ + Imstat=(1<<3), /* for change in modem status */ + Istat= 2, /* interrupt flag (read) */ + Fenabd=(3<<6), /* on if fifo's enabled */ + Fifoctl=2, /* fifo control (write) */ + Fena= (1<<0), /* enable xmit/rcv fifos */ + Ftrig= (1<<6), /* trigger after 4 input characters */ + Fclear=(3<<1), /* clear xmit & rcv fifos */ + Format= 3, /* byte format */ + Bits8= (3<<0), /* 8 bits/byte */ + Stop2= (1<<2), /* 2 stop bits */ + Pena= (1<<3), /* generate parity */ + Peven= (1<<4), /* even parity */ + Pforce=(1<<5), /* force parity */ + Break= (1<<6), /* generate a break */ + Dra= (1<<7), /* address the divisor */ + Mctl= 4, /* modem control */ + Dtr= (1<<0), /* data terminal ready */ + Rts= (1<<1), /* request to send */ + Ri= (1<<2), /* ring */ + Inton= (1<<3), /* turn on interrupts */ + Loop= (1<<4), /* loop back */ + Lstat= 5, /* line status */ + Inready=(1<<0), /* receive buffer full */ + Oerror=(1<<1), /* receiver overrun */ + Perror=(1<<2), /* receiver parity error */ + Ferror=(1<<3), /* rcv framing error */ + Outready=(1<<5), /* output buffer empty */ + Mstat= 6, /* modem status */ + Ctsc= (1<<0), /* clear to send changed */ + Dsrc= (1<<1), /* data set ready changed */ + Rire= (1<<2), /* rising edge of ring indicator */ + Dcdc= (1<<3), /* data carrier detect changed */ + Cts= (1<<4), /* complement of clear to send line */ + Dsr= (1<<5), /* complement of data set ready line */ + Ring= (1<<6), /* complement of ring indicator line */ + Dcd= (1<<7), /* complement of data carrier detect line */ + Scratch=7, /* scratchpad */ + Dlsb= 0, /* divisor lsb */ + Dmsb= 1, /* divisor msb */ + + Serial= 0, + Modem= 1, +}; + +typedef struct Uart Uart; +struct Uart +{ + int port; + uchar sticky[8]; /* sticky write register values */ + int nofifo; + + void (*rx)(int); /* routine to take a received character */ + int (*tx)(void); /* routine to get a character to transmit */ + + uint frame; + uint overrun; +}; + +/* externally-visible console-on-a-uart flag */ +int uartcons; + +Uart uart[2]; + +#define UartFREQ 1843200 + +#define uartwrreg(u,r,v) outb((u)->port + r, (u)->sticky[r] | (v)) +#define uartrdreg(u,r) inb((u)->port + r) + +/* + * set the baud rate by calculating and setting the baudrate + * generator constant. This will work with fairly non-standard + * baud rates. + */ +static void +uartsetbaud(Uart *up, int rate) +{ + uint brconst; + + brconst = (UartFREQ+8*rate-1)/(16*rate); + + uartwrreg(up, Format, Dra); + outb(up->port+Dmsb, (brconst>>8) & 0xff); + outb(up->port+Dlsb, brconst & 0xff); + uartwrreg(up, Format, 0); +} + +/* + * toggle DTR + */ +static void +uartdtr(Uart *up, int n) +{ + if(n) + up->sticky[Mctl] |= Dtr; + else + up->sticky[Mctl] &= ~Dtr; + uartwrreg(up, Mctl, 0); +} + +/* + * toggle RTS + */ +static void +uartrts(Uart *up, int n) +{ + if(n) + up->sticky[Mctl] |= Rts; + else + up->sticky[Mctl] &= ~Rts; + uartwrreg(up, Mctl, 0); +} + +/* + * Enable/disable FIFOs (if possible). + */ +static void +uartfifo(Uart *up, int n) +{ + int i; + Mpl s; + + if(up->nofifo) + return; + + s = splhi(); + + /* reset fifos */ + uartwrreg(up, Fifoctl, Fclear); + + /* empty buffer and interrupt conditions */ + for(i = 0; i < 16; i++){ + uartrdreg(up, Istat); + uartrdreg(up, Data); + } + + /* turn on fifo */ + if(n){ + uartwrreg(up, Fifoctl, Fena|Ftrig); + + if((uartrdreg(up, Istat) & Fenabd) == 0){ + /* didn't work, must be an earlier chip type */ + up->nofifo = 1; + } + } + + splx(s); +} + +static int +uartshift(void) +{ + return -1; +} + +static void +uartintr(Ureg *ur, void *arg) +{ + Uart *up; + int ch; + int s, l, loops; + + USED(ur); + + up = arg; + for(loops = 0; loops < 1024; loops++){ + s = uartrdreg(up, Istat); + switch(s & 0x3F){ + case 6: /* receiver line status */ + l = uartrdreg(up, Lstat); + if(l & Ferror) + up->frame++; + if(l & Oerror) + up->overrun++; + break; + + case 4: /* received data available */ + case 12: + ch = inb(up->port+Data); + if (Development && (ch & Asciimask) == DLE) + firmware(); + if(up->rx) + (*up->rx)(ch & Asciimask); + break; + + case 2: /* transmitter empty */ + ch = -1; + if(up->tx) + ch = (*up->tx)(); + if(ch != -1) + outb(up->port+Data, ch); + break; + + case 0: /* modem status */ + uartrdreg(up, Mstat); + break; + + default: + if(s&1) + return; + print("weird modem interrupt #%2.2ux\n", s); + break; + } + } + panic("uartintr: 0x%2.2ux\n", uartrdreg(up, Istat)); +} + +/* + * turn on a port's interrupts. set DTR and RTS + */ +static void +uartenable(Uart *up) +{ + /* + * turn on interrupts + */ + up->sticky[Iena] = 0; + if(up->tx) + up->sticky[Iena] |= Ixmt; + if(up->rx) + up->sticky[Iena] |= Ircv|Irstat; + + /* + * turn on DTR and RTS + */ + uartdtr(up, 1); + uartrts(up, 1); + uartfifo(up, 1); + + uartwrreg(up, Iena, 0); +} + +void +uartspecial(int port, void (*rx)(int), int (*tx)(void), int baud) +{ + Uart *up = &uart[0]; + + if(up->port) + return; + + switch(port){ + case 0: + up->port = 0x3F8; + intrenable(IrqUART0, uartintr, up, BUSUNKNOWN, "eia0"); + break; + + case 1: + up->port = 0x2F8; + intrenable(IrqUART1, uartintr, up, BUSUNKNOWN, "eia1"); + break; + + default: + return; + } + + /* + * set rate to 115200 baud. + * 8 bits/character. + * 1 stop bit. + * interrupts enabled. + */ + uartsetbaud(up, 115200); + up->sticky[Format] = Bits8; + uartwrreg(up, Format, 0); + up->sticky[Mctl] |= Inton; + uartwrreg(up, Mctl, 0x0); + + if(tx == nil) + tx = uartshift; + + up->rx = rx; + up->tx = tx; + uartenable(up); + if(baud) + uartsetbaud(up, baud); + uartcons = 1; +} + +int +uartgetc(void) +{ + Uart *up = &uart[0]; + + if(conf.useuart && uartrdreg(up, Lstat) & Inready) + return inb(up->port+Data); + return 0; +} + +void +uartputc(int c) +{ + Uart *up = &uart[0]; + int i; + + if(conf.useuart == 0) + return; + + for(i = 0; i < 100; i++){ + if(uartrdreg(up, Lstat) & Outready) + break; + microdelay(100); + } + outb(up->port+Data, c); +} + +void +uartputs(char *s, int n) +{ + int i; + + for(i = 0; i < n; i++) + uartputc(s[i]); +} + +void +uartspecial1(int port, void (*rx)(int), int (*tx)(void), int baud) +{ + Uart *up = &uart[1]; + + if(up->port) + return; + + switch(port){ + + case 0: + up->port = 0x3F8; + intrenable(IrqUART0, uartintr, up, BUSUNKNOWN, "eia0"); + break; + + case 1: + up->port = 0x2F8; + intrenable(IrqUART1, uartintr, up, BUSUNKNOWN, "eia1"); + break; + + default: + return; + } + + /* + * set rate to 9600 baud. + * 8 bits/character. + * 1 stop bit. + * interrupts enabled. + */ + uartsetbaud(up, 9600); + up->sticky[Format] = Bits8; + uartwrreg(up, Format, 0); + up->sticky[Mctl] |= Inton; + uartwrreg(up, Mctl, 0x0); + + up->rx = rx; + up->tx = tx; + uartenable(up); + if(baud) + uartsetbaud(up, baud); +} + +int +uartgetc1(void) +{ + Uart *up = &uart[1]; + + if(uartrdreg(up, Lstat) & Inready) + return inb(up->port+Data); + return 0; +} + +void +uartputc1(int c) +{ + Uart *up = &uart[1]; + int i; + + for(i = 0; i < 100; i++){ + if(uartrdreg(up, Lstat) & Outready) + break; + delay(1); + } + outb(up->port+Data, c); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,572 @@ +#include "all.h" +#include "io.h" +#include "mp.h" +#include "apic.h" +#include + +typedef struct Rsd Rsd; +typedef struct Tbl Tbl; + +struct Rsd { + uchar sig[8]; + uchar csum; + uchar oemid[6]; + uchar rev; + uchar raddr[4]; + uchar len[4]; + uchar xaddr[8]; + uchar xcsum; + uchar reserved[3]; +}; + +struct Tbl { + uchar sig[4]; + uchar len[4]; + uchar rev; + uchar csum; + uchar oemid[6]; + uchar oemtid[8]; + uchar oemrev[4]; + uchar cid[4]; + uchar crev[4]; + uchar data[]; +}; + +enum { + Tblsz = 4+4+1+1+6+8+4+4+4, + Rdsz = 8+1+6+1+4+4+8+1+3, +}; + +static Rsd *rsd; +static int ntblpa; /* physical addresses visited by maptable() */ +static uintmem tblpa[64]; +static int ntblmap; /* successfully mapped tables */ +static Tbl *tblmap[64]; + +#define DBG(...) if(0){print(__VA_ARGS__);}while(0) + +static int +checksum(void *v, int n) +{ + uchar *p, s; + + s = 0; + p = v; + while(n-- > 0) + s += *p++; + return s; +} + +/* + * typical leakage ~40kb. + */ +void* +amlalloc(usize n) +{ + void *p; + + if((p = ialloc(n, 0)) != nil){ + // setmalloctag(&p, getcallerpc(&n)); + // setrealloctag(&p, 0); + } + return p; +} + +void +amlfree(void *p) +{ + USED(p); +// free(p); +} + +#define get16(p) getle((p), 2) +#define get32(p) getle((p), 4) +#define get64(p) getle((p), 8) + +extern uvlong getle(uchar*, int); + +static uint +tbldlen(Tbl *t) +{ + return get32(t->len) - Tblsz; +} + +static Tbl* +findtable(void *sig) +{ + int i; + for(i=0; isig, sig, 4) == 0) + return tblmap[i]; + return nil; +} + + +/* argument is uvlong to prevent caller from caring */ +static void +maptable(uvlong xpa) +{ + uchar *p, *e; + int i; + uintmem pa; + u32int l; + Tbl *t; + + pa = xpa; + if(pa != xpa || pa == 0) + return; + if(ntblpa >= nelem(tblpa) || ntblmap >= nelem(tblmap)) + return; + for(i=0; ilen); + if(l < Tblsz){ + vunmap(t, 8); + return; + } + vunmap(t, 8); + if((t = vmap(pa, l)) == nil) + return; + if(checksum(t, l)){ + vunmap(t, l); + return; + } + tblmap[ntblmap++] = t; + + p = (uchar*)t; + e = p + l; + if(memcmp("RSDT", t->sig, 4) == 0){ + for(p = t->data; p+3 < e; p += 4) + maptable(get32(p)); + return; + } + if(memcmp("XSDT", t->sig, 4) == 0){ + for(p = t->data; p+7 < e; p += 8) + maptable(get64(p)); + return; + } + if(memcmp("FACP", t->sig, 4) == 0){ + if(l < 44) + return; + maptable(get32(p + 40)); + if(l < 148) + return; + maptable(get64(p + 140)); + return; + } +} + +static void* +rsdscan(uchar* addr, int len, char* signature) +{ + int sl; + uchar *e, *p; + + e = addr+len; + sl = strlen(signature); + for(p = addr; p+sl < e; p += 16){ + if(memcmp(p, signature, sl)) + continue; + return p; + } + + return nil; +} + +static void* +rsdsearch(char* signature) +{ + uintptr p; + uchar *bda; + Rsd *rsd; + + /* + * Search for the data structure signature: + * 1) in the first KB of the EBDA; + * 2) in the BIOS ROM between 0xE0000 and 0xFFFFF. + */ + if(strncmp((char*)KADDR(0xFFFD9), "EISA", 4) == 0){ + bda = BIOSSEG(0x40); + if((p = (bda[0x0F]<<8)|bda[0x0E])){ + if(rsd = rsdscan(KADDR(p), 1024, signature)) + return rsd; + } + } + return rsdscan(BIOSSEG(0xE000), 0x20000, signature); +} + +static void +loadrsd(void) +{ + if((rsd = rsdsearch("RSD PTR ")) == nil) + panic("acpi: no rsd ptr"); + if(checksum(rsd, 20) && checksum(rsd, 36)) + panic("acpi: acpi checksum"); +} + +static void +maptables(void) +{ + loadrsd(); + if(ntblmap > 0 || ntblpa > 0) + return; + if(!checksum(rsd, 20)) + maptable(get32(rsd->raddr)); + if(rsd->rev >= 2) + if(!checksum(rsd, 36)) + maptable(get64(rsd->xaddr)); +} + +#define Lintr Localintr +enum { + Iointr, + Lintr, + + MTint = 0, /* fake interrupt type, equivalent to fixed */ +}; + +static u32int +apicmkintr(uint src, uint inttype, int polarity, int trigger, uint apicno, uint intin) +{ + u32int v; + Apic *apic; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(apicno != 0xff){ + if(Napic < 256 && apicno >= Napic){ + print("apic: id out-of-range: %d\n", apicno); + return 0; + } + switch(src){ + default: + print("apic: intin botch: %d\n", intin); + return 0; + case Iointr: + if((apic = ioapiclookup(apicno)) == nil){ + print("ioapic%d: ioapic unusable\n", apicno); + return 0; + } + if(intin >= apic->nrdt){ + print("ioapic%d: intin %d >= nrdt %d\n", apicno, intin, apic->nrdt); + return 0; + } + break; + case Lintr: + if((apic = lapiclookup(apicno)) == nil){ + print("lapic%d: lapic unusable\n", apicno); + return 0; + } + if(intin >= nelem(apic->lvt)){ + print("lapic%d: intin beyond lvt: %d\n", apicno, intin); + return 0; + } + USED(apic); + break; + } + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(inttype){ + default: + print("apic: bad irq type %d\n", inttype); + return 0; + case MTint: /* INT (fake type, same as fixed) */ + v |= polarity | trigger; + break; + case MTnmi: /* NMI */ + case MTsmi: /* SMI */ + case MTei: /* ExtINT */ + v |= TMedge|IPhigh|inttype; + break; + } + + return v; +} + +int +flagstopolarity(int bustype, int flags) +{ + switch(flags & 3){ + case 1: + return IPhigh; + case 3: + return IPlow; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return IPhigh; + case BusPCI: + return IPlow; + break; + default: + return -1; + } +} + +int +flagstotrigger(int bustype, int flags) +{ + switch((flags>>3) & 3){ + case 1: + return TMedge; + case 3: + return TMlevel; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return TMedge; + case BusPCI: + return TMlevel; + break; + default: + return -1; + } +} + +static void +addirq(int gsi, int bustype, int busno, int irq, int flags) +{ + uint apicno, intin, polarity, trigger; + u32int i; + + if((apicno = gsitoapicid(gsi, &intin)) == -1){ + print("acpi: addirq: no apic for gsi %d bus %d.%d\n", gsi, bustype, busno); + return; + } + DBG("addirq: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + polarity = flagstopolarity(bustype, flags); + trigger = flagstotrigger(bustype, flags); + if(polarity == -1 || trigger == -1){ + print("addirq: bad polarity: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + return; + } + + i = apicmkintr(Iointr, MTint, polarity, trigger, apicno, intin); +#ifndef MPS + ioapicintrinit(bustype, busno, apicno, intin, irq, i); +#endif +} + +static char* +eisaid(void *v) +{ + uint b, l; + int i; + static char id[8]; + + if(amltag(v) == 's') + return v; + b = amlint(v); + for(l = 0, i=24; i>=0; i -= 8, b >>= 8) + l |= (b & 0xFF) << i; + id[7] = 0; + for(i=6; i>=3; i--, l >>= 4) + id[i] = "0123456789ABCDEF"[l & 0xF]; + for(i=2; i>=0; i--, l >>= 5) + id[i] = '@' + (l & 0x1F); + return id; +} + +static int +pcibusno(void *dot) +{ + int bno, adr, tbdf; + Pcidev *pdev; + void *p, *x; + char *id; + + id = nil; + if(x = amlwalk(dot, "^_HID")){ + p = nil; + if(amleval(x, "", &p) == 0) + id = eisaid(p); + } + if((x = amlwalk(dot, "^_BBN")) == nil) + if((x = amlwalk(dot, "^_ADR")) == nil) + return -1; + p = nil; + if(amleval(x, "", &p) < 0) + return -1; + adr = amlint(p); + /* if root bridge, then we are done here */ + if(id != nil && (strcmp(id, "PNP0A03")==0 || strcmp(id, "PNP0A08")==0)) + return adr; + x = amlwalk(dot, "^"); + if(x == nil || x == dot) + return -1; + if((bno = pcibusno(x)) < 0) + return -1; + tbdf = MKBUS(BusPCI, bno, adr>>16, adr&0xFFFF); + pdev = pcimatchtbdf(tbdf); + if(pdev == nil){ + DBG("acpi: pcibusno: bridge not found: %τ\n", tbdf); + return -1; + } + if(pdev->bridge == nil){ + DBG("acpi: pcibusno: nothing bridged: %τ\n", tbdf); + return -1; + } + return BUSBNO(pdev->bridge->tbdf); +} + +static int +enumprt(void *dot, void *) +{ + void *p, **a, **b; + int bno, dno, pin, gsi; + int n, i; + + bno = pcibusno(dot); + if(bno < 0){ + DBG("enumprt: pci not found %V\n", dot); + return 1; + } + + /* evalulate _PRT method */ + p = nil; + if(amleval(dot, "", &p) < 0) + return 1; + if(amltag(p) != 'p') + return 1; + + n = amllen(p); + a = amlval(p); + for(i=0; i>16; + pin = amlint(b[1]); + if(amltag(b[2]) == 'N' || amlint(b[2]) != 0){ + print("enumprt: interrupt link not handled %V\n", b[2]); + continue; + } + gsi = amlint(b[3]); + addirq(gsi, BusPCI, bno, (dno<<2)|pin, 0); + } + return 1; +} + +static void +loadtbls(char *name, int all) +{ + int i; + Tbl *t; + + for(i = 0; i < ntblmap; i++){ + t = tblmap[i]; + if(memcmp(t->sig, name, 4) == 0){ + amlload(t->data, tbldlen(t)); + if(!all) + break; + } + } +} + +enum { + Lapicen = 1, +}; + +void +acpiinit(int maxmach) +{ + uchar *p, *e; + int i, c, nmach; + uintmem lapicbase; + Tbl *t; + + print("acpiinit\n"); + maptables(); + amlinit(); + loadtbls("DSDT", 0); + loadtbls("SSDT", 1); + + /* set APIC mode */ + amleval(amlwalk(amlroot, "_PIC"), "i", 1, nil); + if((t = findtable("APIC")) == nil) + panic("acpiinit: no APIC table"); + + p = t->data; + e = p + tbldlen(t); + lapicbase = get32(p); + p += 8; + + nmach = 0; + for(; p < e; p += c){ + c = p[1]; + if(c < 2 || (p+c) > e) + break; + switch(*p){ + case 0x00: /* Processor Local APIC */ + if(p[4] & Lapicen && conf.nmach < maxmach){ + lapicinit(p[3], lapicbase, nmach==0); + conf.nmach = ++nmach; + } + break; + case 0x01: /* I/O APIC */ + ioapicinit(p[2], get32(p+8), get32(p+4)); + break; + case 0x02: /* Interrupt Source Override */ + addirq(get32(p+4), BusISA, 0, p[3], get16(p+8)); + break; + case 0x03: /* NMI Source */ + print("acpi: ignoring nmi source\n"); + break; + case 0x04: /* Local APIC NMI */ + DBG("acpi: lapic nmi %.2ux flags %.4ux lint# %d (ignored)\n", + p[2], (uint)get16(p+3), p[5]); + break; + case 0x05: /* Local APIC Address Override */ + case 0x06: /* I/O SAPIC */ + case 0x07: /* Local SAPIC */ + case 0x08: /* Platform Interrupt Sources */ + case 0x09: /* Processor Local x2APIC */ + case 0x0A: /* x2APIC NMI */ + case 0x0B: /* GIC */ + case 0x0C: /* GICD */ + print("acpi: ignoring entry: %.2ux\n", *p); + break; + } + } + + /* look for PCI interrupt mappings */ + amlenum(amlroot, "_PRT", enumprt, nil); + + /* add identity mapped legacy isa interrupts */ + for(i=0; i<16; i++) + addirq(i, BusISA, 0, i, 0); + + /* free the AML interpreter */ + amlexit(); + + print("acpiinit: %d maches\n", nmach); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,341 @@ +/* + * advanced host controller interface (sata) + * © 2007-9 coraid, inc + */ + +/* pci configuration */ +enum { + Abar = 5, +}; + +/* + * ahci memory configuration + * + * 0000-0023 generic host control + * 0024-009f reserved + * 00a0-00ff vendor specific. + * 0100-017f port 0 + * ... + * 1080-1100 port 31 + */ + +/* cap bits: supported features */ +enum { + H64a = 1<<31, /* 64-bit addressing */ + Hncq = 1<<30, /* ncq */ + Hsntf = 1<<29, /* snotification reg. */ + Hmps = 1<<28, /* mech pres switch */ + Hss = 1<<27, /* staggered spinup */ + Halp = 1<<26, /* aggressive link pm */ + Hal = 1<<25, /* activity led */ + Hclo = 1<<24, /* command-list override */ + Hiss = 1<<20, /* for interface speed */ + Ham = 1<<18, /* ahci-mode only */ + Hpm = 1<<17, /* port multiplier */ + Hfbs = 1<<16, /* fis-based switching */ + Hpmb = 1<<15, /* multiple-block pio */ + Hssc = 1<<14, /* slumber state */ + Hpsc = 1<<13, /* partial-slumber state */ + Hncs = 1<<8, /* n command slots */ + Hcccs = 1<<7, /* coal */ + Hems = 1<<6, /* enclosure mgmt. */ + Hxs = 1<<5, /* external sata */ + Hnp = 1<<0, /* n ports */ +}; + +/* ghc bits */ +enum { + Hae = 1<<31, /* enable ahci */ + Hie = 1<<1, /* " interrupts */ + Hhr = 1<<0, /* hba reset */ +}; + +/* cap2 bits */ +enum { + Apts = 1<<2, /* automatic partial to slumber */ + Nvmp = 1<<1, /* nvmhci present; nvram */ + Boh = 1<<0, /* bios/os handoff supported */ +}; + +/* emctl bits */ +enum { + Pm = 1<<27, /* port multiplier support */ + Alhd = 1<<26, /* activity led hardware driven */ + Xonly = 1<<25, /* rx messages not supported */ + Smb = 1<<24, /* single msg buffer; rx limited */ + Esgpio = 1<<19, /* sgpio messages supported */ + Eses2 = 1<<18, /* ses-2 supported */ + Esafte = 1<<17, /* saf-te supported */ + Elmt = 1<<16, /* led msg types support */ + Emrst = 1<<9, /* reset all em logic */ + Tmsg = 1<<8, /* transmit message */ + Mr = 1<<0, /* message rx'd */ + Emtype = Esgpio | Eses2 | Esafte | Elmt, +}; + +/* bios bits */ +enum { + Bb = 1<<4, /* bios cleaning up for change */ + Ooc = 1<<3, /* os ownership change */ + Sooe = 1<<2, /* smi on ownership change enable */ + Oos = 1<<1, /* os owned semaphore */ + Bos = 1<<0, /* bios owned semaphore */ +}; + +typedef struct { + u32int cap; + u32int ghc; + u32int isr; + u32int pi; /* ports implemented */ + u32int ver; + u32int ccc; /* coaleasing control */ + u32int cccports; + u32int emloc; + u32int emctl; + u32int cap2; + u32int bios; +} Ahba; + +enum { + Acpds = 1<<31, /* cold port detect status */ + Atfes = 1<<30, /* task file error status */ + Ahbfs = 1<<29, /* hba fatal */ + Ahbds = 1<<28, /* hba error (parity error) */ + Aifs = 1<<27, /* interface fatal §6.1.2 */ + Ainfs = 1<<26, /* interface error (recovered) */ + Aofs = 1<<24, /* too many bytes from disk */ + Aipms = 1<<23, /* incorrect prt mul status */ + Aprcs = 1<<22, /* PhyRdy change status Pxserr.diag.n */ + Adpms = 1<<7, /* mechanical presence status */ + Apcs = 1<<6, /* port connect diag.x */ + Adps = 1<<5, /* descriptor processed */ + Aufs = 1<<4, /* unknown fis diag.f */ + Asdbs = 1<<3, /* set device bits fis received w/ i bit set */ + Adss = 1<<2, /* dma setup */ + Apio = 1<<1, /* pio setup fis */ + Adhrs = 1<<0, /* device to host register fis */ + + IEM = Acpds|Atfes|Ahbds|Ahbfs|Ahbds|Aifs|Ainfs|Aprcs|Apcs|Adps| + Aufs|Asdbs|Adss|Adhrs, + Ifatal = Atfes|Ahbfs|Ahbds|Aifs, +}; + +/* serror bits */ +enum { + SerrX = 1<<26, /* exchanged */ + SerrF = 1<<25, /* unknown fis */ + SerrT = 1<<24, /* transition error */ + SerrS = 1<<23, /* link sequence */ + SerrH = 1<<22, /* handshake */ + SerrC = 1<<21, /* crc */ + SerrD = 1<<20, /* not used by ahci */ + SerrB = 1<<19, /* 10-tp-8 decode */ + SerrW = 1<<18, /* comm wake */ + SerrI = 1<<17, /* phy internal */ + SerrN = 1<<16, /* phyrdy change */ + + ErrE = 1<<11, /* internal */ + ErrP = 1<<10, /* ata protocol violation */ + ErrC = 1<<9, /* communication */ + ErrT = 1<<8, /* transient */ + ErrM = 1<<1, /* recoverd comm */ + ErrI = 1<<0, /* recovered data integrety */ + + ErrAll = ErrE|ErrP|ErrC|ErrT|ErrM|ErrI, + SerrAll = SerrX|SerrF|SerrT|SerrS|SerrH|SerrC|SerrD|SerrB|SerrW| + SerrI|SerrN|ErrAll, + SerrBad = 0x7f<<19, +}; + +/* cmd register bits */ +enum { + Aicc = 1<<28, /* interface communcations control. 4 bits */ + Aasp = 1<<27, /* aggressive slumber & partial sleep */ + Aalpe = 1<<26, /* aggressive link pm enable */ + Adlae = 1<<25, /* drive led on atapi */ + Aatapi = 1<<24, /* device is atapi */ + Apste = 1<<23, /* automatic slumber to partial cap */ + Afbsc = 1<<22, /* fis-based switching capable */ + Aesp = 1<<21, /* external sata port */ + Acpd = 1<<20, /* cold presence detect */ + Ampsp = 1<<19, /* mechanical pres. */ + Ahpcp = 1<<18, /* hot plug capable */ + Apma = 1<<17, /* pm attached */ + Acps = 1<<16, /* cold presence state */ + Acr = 1<<15, /* cmdlist running */ + Afr = 1<<14, /* fis running */ + Ampss = 1<<13, /* mechanical presence switch state */ + Accs = 1<<8, /* current command slot 12:08 */ + Afre = 1<<4, /* fis enable receive */ + Aclo = 1<<3, /* command list override */ + Apod = 1<<2, /* power on dev (requires cold-pres. detect) */ + Asud = 1<<1, /* spin-up device; requires ss capability */ + Ast = 1<<0, /* start */ + + Arun = Ast|Acr|Afre|Afr, + Apwr = Apod|Asud, +}; + +/* sctl register bits */ +enum { + Aipm = 1<<8, /* interface power mgmt. 3=off */ + Aspd = 1<<4, + Adet = 1<<0, /* device detection */ +}; + +/* sstatus register bits */ +enum{ + /* sstatus det */ + Smissing = 0<<0, + Spresent = 1<<0, + Sphylink = 3<<0, + Sbist = 4<<0, + Smask = 7<<0, + + /* sstatus speed */ + Gmissing = 0<<4, + Gi = 1<<4, + Gii = 2<<4, + Giii = 3<<4, + Gmask = 7<<4, + + /* sstatus ipm */ + Imissing = 0<<8, + Iactive = 1<<8, + Isleepy = 2<<8, + Islumber = 6<<8, + Imask = 7<<8, + + SImask = Smask | Imask, + SSmask = Smask | Isleepy, +}; + +#define sstatus scr0 +#define sctl scr2 +#define serror scr1 +#define sactive scr3 +#define ntf scr4 + +typedef struct { + u32int list; /* PxCLB must be 1kb aligned */ + u32int listhi; + u32int fis; /* 256-byte aligned */ + u32int fishi; + u32int isr; + u32int ie; /* interrupt enable */ + u32int cmd; + u32int res1; + u32int task; + u32int sig; + u32int scr0; + u32int scr2; + u32int scr1; + u32int scr3; + u32int ci; /* command issue */ + u32int scr4; + u32int fbs; + u32int res2[11]; + u32int vendor[4]; +} Aport; + +/* in host's memory; not memory mapped */ +typedef struct { + uchar *base; + uchar *d; + uchar *p; + uchar *r; + uchar *u; + u32int *devicebits; +} Afis; + +enum { + Lprdtl = 1<<16, /* physical region descriptor table len */ + Lpmp = 1<<12, /* port multiplier port */ + Lclear = 1<<10, /* clear busy on R_OK */ + Lbist = 1<<9, + Lreset = 1<<8, + Lpref = 1<<7, /* prefetchable */ + Lwrite = 1<<6, + Latapi = 1<<5, + Lcfl = 1<<0, /* command fis length in double words */ +}; + +/* in hosts memory; memory mapped */ +typedef struct { + u32int flags; + u32int len; + u32int ctab; + u32int ctabhi; + uchar reserved[16]; +} Alist; + +typedef struct { + u32int dba; + u32int dbahi; + u32int pad; + u32int count; +} Aprdt; + +typedef struct { + uchar cfis[0x40]; + uchar atapi[0x10]; + uchar pad[0x30]; + Aprdt prdt; +} Actab; + +/* enclosure message header */ +enum { + Mled = 0, + Msafte = 1, + Mses2 = 2, + Msgpio = 3, +}; + +enum { + Ledmsz = 8, +}; + +typedef struct { + uchar dummy; + uchar msize; + uchar dsize; + uchar type; + uchar hba; /* bits 0:4 are the port */ + uchar pm; + uchar led[2]; +} Aledmsg; + +enum { + Aled = 1<<0, + Locled = 1<<3, + Errled = 1<<6, + + Ledoff = 0, + Ledon = 1, +}; + +typedef struct { + uint encsz; + u32int *enctx; + u32int *encrx; +} Aenc; + +enum { + Ferror = 1, + Fdone = 2, +}; + +typedef struct { + QLock; + Rendez; + uchar flag; + Sfis; + Afis fis; + Alist *list; + Actab *ctab; +} Aportm; + +typedef struct { + Aport *p; + Aportm *m; +} Aportc; --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,209 @@ +enum { /* Cr0 */ + Pe = 0x00000001, /* Protected Mode Enable */ + Mp = 0x00000002, /* Monitor Coprocessor */ + Em = 0x00000004, /* Emulate Coprocessor */ + Ts = 0x00000008, /* Task Switched */ + Et = 0x00000010, /* Extension Type */ + Ne = 0x00000020, /* Numeric Error */ + Wp = 0x00010000, /* Write Protect */ + Am = 0x00040000, /* Alignment Mask */ + Nw = 0x20000000, /* Not Writethrough */ + Cd = 0x40000000, /* Cache Disable */ + Pg = 0x80000000, /* Paging Enable */ +}; + +enum { /* Cr3 */ + Pwt = 0x00000008, /* Page-Level Writethrough */ + Pcd = 0x00000010, /* Page-Level Cache Disable */ +}; + +enum { /* Cr4 */ + Vme = 0x00000001, /* Virtual-8086 Mode Extensions */ + Pvi = 0x00000002, /* Protected Mode Virtual Interrupts */ + Tsd = 0x00000004, /* Time-Stamp Disable */ + De = 0x00000008, /* Debugging Extensions */ + Pse = 0x00000010, /* Page-Size Extensions */ + Pae = 0x00000020, /* Physical Address Extension */ + Mce = 0x00000040, /* Machine Check Enable */ + Pge = 0x00000080, /* Page-Global Enable */ + Pce = 0x00000100, /* Performance Monitoring Counter Enable */ + Osfxsr = 0x00000200, /* FXSAVE/FXRSTOR Support */ + Osxmmexcpt = 0x00000400, /* Unmasked Exception Support */ +}; + +enum { /* cpuid fn 1 dx */ + Pat = 1<<16, /* page table attributes (memory type control */ +}; + +enum { /* Rflags */ + Cf = 0x00000001, /* Carry Flag */ + Pf = 0x00000004, /* Parity Flag */ + Af = 0x00000010, /* Auxiliary Flag */ + Zf = 0x00000040, /* Zero Flag */ + Sf = 0x00000080, /* Sign Flag */ + Tf = 0x00000100, /* Trap Flag */ + If = 0x00000200, /* Interrupt Flag */ + Df = 0x00000400, /* Direction Flag */ + Of = 0x00000800, /* Overflow Flag */ + Iopl0 = 0x00000000, /* I/O Privilege Level */ + Iopl1 = 0x00001000, + Iopl2 = 0x00002000, + Iopl3 = 0x00003000, + Nt = 0x00004000, /* Nested Task */ + Rf = 0x00010000, /* Resume Flag */ + Vm = 0x00020000, /* Virtual-8086 Mode */ + Ac = 0x00040000, /* Alignment Check */ + Vif = 0x00080000, /* Virtual Interrupt Flag */ + Vip = 0x00100000, /* Virtual Interrupt Pending */ + Id = 0x00200000, /* ID Flag */ +}; + +enum { /* MSRs */ + PerfEvtbase = 0xc0010000, /* Performance Event Select */ + PerfCtrbase = 0xc0010004, /* Performance Counters */ + + Efer = 0xc0000080, /* Extended Feature Enable */ + Star = 0xc0000081, /* Legacy Target IP and [CS]S */ + Lstar = 0xc0000082, /* Long Mode Target IP */ + Cstar = 0xc0000083, /* Compatibility Target IP */ + Sfmask = 0xc0000084, /* SYSCALL Flags Mask */ + FSbase = 0xc0000100, /* 64-bit FS Base Address */ + GSbase = 0xc0000101, /* 64-bit GS Base Address */ + KernelGSbase = 0xc0000102, /* SWAPGS instruction */ +}; + +enum { /* Efer */ + Sce = 0x00000001, /* System Call Extension */ + Lme = 0x00000100, /* Long Mode Enable */ + Lma = 0x00000400, /* Long Mode Active */ + Nxe = 0x00000800, /* No-Execute Enable */ + Svme = 0x00001000, /* SVM Extension Enable */ + Ffxsr = 0x00004000, /* Fast FXSAVE/FXRSTOR */ +}; + +enum { /* PML4E/PDPE/PDE/PTE */ + PteP = 0x0000000000000001ull,/* Present */ + PteRW = 0x0000000000000002ull,/* Read/Write */ + PteU = 0x0000000000000004ull,/* User/Supervisor */ + PtePWT = 0x0000000000000008ull,/* Page-Level Write Through */ + PtePCD = 0x0000000000000010ull,/* Page Level Cache Disable */ + PteA = 0x0000000000000020ull,/* Accessed */ + PteD = 0x0000000000000040ull,/* Dirty */ + PtePS = 0x0000000000000080ull,/* Page Size */ + Pte4KPAT = PtePS, /* PTE PAT */ + PteG = 0x0000000000000100ull,/* Global */ + Pte2MPAT = 0x0000000000001000ull,/* PDE PAT */ + Pte1GPAT = Pte2MPAT, /* PDPE PAT */ + PteNX = 0x8000000000000000ull,/* No Execute */ +}; + +enum { + PATUC = 0, /* uncachable */ + PATWC = 1, /* use write-combining buffers */ + PATWT = 4, /* write-through */ + PATWP = 5, /* write protect */ + PATWB = 6, /* write back */ + PATUCMINUS = 7, /* UC-; strongly uncacheable */ +}; + +enum { /* Exceptions */ + IdtDE = 0, /* Divide-by-Zero Error */ + IdtDB = 1, /* Debug */ + IdtNMI = 2, /* Non-Maskable-Interrupt */ + IdtBP = 3, /* Breakpoint */ + IdtOF = 4, /* Overflow */ + IdtBR = 5, /* Bound-Range */ + IdtUD = 6, /* Invalid-Opcode */ + IdtNM = 7, /* Device-Not-Available */ + IdtDF = 8, /* Double-Fault */ + Idt09 = 9, /* unsupported */ + IdtTS = 10, /* Invalid-TSS */ + IdtNP = 11, /* Segment-Not-Present */ + IdtSS = 12, /* Stack */ + IdtGP = 13, /* General-Protection */ + IdtPF = 14, /* Page-Fault */ + Idt0F = 15, /* reserved */ + IdtMF = 16, /* x87 FPE-Pending */ + IdtAC = 17, /* Alignment-Check */ + IdtMC = 18, /* Machine-Check */ + IdtXF = 19, /* SIMD Floating-Point */ +}; + +/* + * Vestigial Segmented Virtual Memory. + */ +enum { /* Segment Descriptor */ + SdISTM = 0x0000000700000000ull,/* Interrupt Stack Table Mask */ + SdA = 0x0000010000000000ull,/* Accessed */ + SdR = 0x0000020000000000ull,/* Readable (Code) */ + SdW = 0x0000020000000000ull,/* Writeable (Data) */ + SdE = 0x0000040000000000ull,/* Expand Down */ + SdaTSS = 0x0000090000000000ull,/* Available TSS */ + SdbTSS = 0x00000b0000000000ull,/* Busy TSS */ + SdCG = 0x00000c0000000000ull,/* Call Gate */ + SdIG = 0x00000e0000000000ull,/* Interrupt Gate */ + SdTG = 0x00000f0000000000ull,/* Trap Gate */ + SdCODE = 0x0000080000000000ull,/* Code/Data */ + SdS = 0x0000100000000000ull,/* System/User */ + SdDPL0 = 0x0000000000000000ull,/* Descriptor Privilege Level */ + SdDPL1 = 0x0000200000000000ull, + SdDPL2 = 0x0000400000000000ull, + SdDPL3 = 0x0000600000000000ull, + SdP = 0x0000800000000000ull,/* Present */ + Sd4G = 0x000f00000000ffffull,/* 4G Limit */ + SdL = 0x0020000000000000ull,/* Long Attribute */ + SdD = 0x0040000000000000ull,/* Default Operand Size */ + SdG = 0x0080000000000000ull,/* Granularity */ +}; + +/* + * Performance Counter Configuration + */ +enum { /* Performance Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +enum { /* Segment Selector */ + SsRPL0 = 0x0000, /* Requestor Privilege Level */ + SsRPL1 = 0x0001, + SsRPL2 = 0x0002, + SsRPL3 = 0x0003, + SsTIGDT = 0x0000, /* GDT Table Indicator */ + SsTILDT = 0x0004, /* LDT Table Indicator */ + SsSIM = 0xfff8, /* Selector Index Mask */ +}; + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +enum { + SiNULL = 0, /* NULL selector index */ + SiCS = 1, /* CS selector index */ + SiDS = 2, /* DS selector index */ + SiU32CS = 3, /* User CS selector index */ + SiUDS = 4, /* User DS selector index */ + SiUCS = 5, /* User CS selector index */ + SiFS = 6, /* FS selector index */ + SiGS = 7, /* GS selector index */ + SiTSS = 8, /* TSS selector index */ +}; + +/* + * Extern registers. + */ +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,162 @@ +#define Pe 0x00000001 /* Protected Mode Enable */ +#define Mp 0x00000002 /* Monitor Coprocessor */ +#define Em 0x00000004 /* Emulate Coprocessor */ +#define Ts 0x00000008 /* Task Switched */ +#define Et 0x00000010 /* Extension Type */ +#define Ne 0x00000020 /* Numeric Error */ +#define Wp 0x00010000 /* Write Protect */ +#define Am 0x00040000 /* Alignment Mask */ +#define Nw 0x20000000 /* Not Writethrough */ +#define Cd 0x40000000 /* Cache Disable */ +#define Pg 0x80000000 /* Paging Enable */ + +#define Pwt 0x00000008 /* Page-Level Writethrough */ +#define Pcd 0x00000010 /* Page-Level Cache Disable */ + +#define Vme 0x00000001 /* Virtual-8086 Mode Extensions */ +#define Pvi 0x00000002 /* Protected Mode Virtual Interrupts */ +#define Tsd 0x00000004 /* Time-Stamp Disable */ +#define De 0x00000008 /* Debugging Extensions */ +#define Pse 0x00000010 /* Page-Size Extensions */ +#define Pae 0x00000020 /* Physical Address Extension */ +#define Mce 0x00000040 /* Machine Check Enable */ +#define Pge 0x00000080 /* Page-Global Enable */ +#define Pce 0x00000100 /* Performance Monitoring Counter Enable */ +#define Osfxsr 0x00000200 /* FXSAVE/FXRSTOR Support */ +#define Osxmmexcpt 0x00000400 /* Unmasked Exception Support */ + +#define Cf 0x00000001 /* Carry Flag */ +#define Pf 0x00000004 /* Parity Flag */ +#define Af 0x00000010 /* Auxiliary Flag */ +#define Zf 0x00000040 /* Zero Flag */ +#define Sf 0x00000080 /* Sign Flag */ +#define Tf 0x00000100 /* Trap Flag */ +#define If 0x00000200 /* Interrupt Flag */ +#define Df 0x00000400 /* Direction Flag */ +#define Of 0x00000800 /* Overflow Flag */ +#define Iopl0 0x00000000 /* I/O Privilege Level */ +#define Iopl1 0x00001000 +#define Iopl2 0x00002000 +#define Iopl3 0x00003000 +#define Nt 0x00004000 /* Nested Task */ +#define Rf 0x00010000 /* Resume Flag */ +#define Vm 0x00020000 /* Virtual-8086 Mode */ +#define Ac 0x00040000 /* Alignment Check */ +#define Vif 0x00080000 /* Virtual Interrupt Flag */ +#define Vip 0x00100000 /* Virtual Interrupt Pending */ +#define Id 0x00200000 /* ID Flag */ + +#define PerfEvtbase 0xc0010000 /* Performance Event Select */ +#define PerfCtrbase 0xc0010004 /* Performance Counters */ + +#define Efer 0xc0000080 /* Extended Feature Enable */ +#define Star 0xc0000081 /* Legacy Target IP and [CS]S */ +#define Lstar 0xc0000082 /* Long Mode Target IP */ +#define Cstar 0xc0000083 /* Compatibility Target IP */ +#define Sfmask 0xc0000084 /* SYSCALL Flags Mask */ +#define FSbase 0xc0000100 /* 64-bit FS Base Address */ +#define GSbase 0xc0000101 /* 64-bit GS Base Address */ +#define KernelGSbase 0xc0000102 /* SWAPGS instruction */ + +#define Sce 0x00000001 /* System Call Extension */ +#define Lme 0x00000100 /* Long Mode Enable */ +#define Lma 0x00000400 /* Long Mode Active */ +#define Nxe 0x00000800 /* No-Execute Enable */ +#define Svme 0x00001000 /* SVM Extension Enable */ +#define Ffxsr 0x00004000 /* Fast FXSAVE/FXRSTOR */ + +#define PteP 0x0000000000000001ull /* Present */ +#define PteRW 0x0000000000000002ull /* Read/Write */ +#define PteU 0x0000000000000004ull /* User/Supervisor */ +#define PtePWT 0x0000000000000008ull /* Page-Level Write Through */ +#define PtePCD 0x0000000000000010ull /* Page Level Cache Disable */ +#define PteA 0x0000000000000020ull /* Accessed */ +#define PteD 0x0000000000000040ull /* Dirty */ +#define PtePS 0x0000000000000080ull /* Page Size */ +#define Pte4KPAT PtePS /* PTE PAT */ +#define PteG 0x0000000000000100ull /* Global */ +#define Pte2MPAT 0x0000000000001000ull /* PDE PAT */ +#define Pte1GPAT Pte2MPAT /* PDPE PAT */ +#define PteNX 0x8000000000000000ull /* No Execute */ + +#define IdtDE 0 /* Divide-by-Zero Error */ +#define IdtDB 1 /* Debug */ +#define IdtNMI 2 /* Non-Maskable-Interrupt */ +#define IdtBP 3 /* Breakpoint */ +#define IdtOF 4 /* Overflow */ +#define IdtBR 5 /* Bound-Range */ +#define IdtUD 6 /* Invalid-Opcode */ +#define IdtNM 7 /* Device-Not-Available */ +#define IdtDF 8 /* Double-Fault */ +#define Idt09 9 /* unsupported */ +#define IdtTS 10 /* Invalid-TSS */ +#define IdtNP 11 /* Segment-Not-Present */ +#define IdtSS 12 /* Stack */ +#define IdtGP 13 /* General-Protection */ +#define IdtPF 14 /* Page-Fault */ +#define Idt0F 15 /* reserved */ +#define IdtMF 16 /* x87 FPE-Pending */ +#define IdtAC 17 /* Alignment-Check */ +#define IdtMC 18 /* Machine-Check */ +#define IdtXF 19 /* SIMD Floating-Point */ + +#define SdISTM 0x0000000700000000ull /* Interrupt Stack Table Mask */ +#define SdA 0x0000010000000000ull /* Accessed */ +#define SdR 0x0000020000000000ull /* Readable (Code) */ +#define SdW 0x0000020000000000ull /* Writeable (Data) */ +#define SdE 0x0000040000000000ull /* Expand Down */ +#define SdaTSS 0x0000090000000000ull /* Available TSS */ +#define SdbTSS 0x00000b0000000000ull /* Busy TSS */ +#define SdCG 0x00000c0000000000ull /* Call Gate */ +#define SdIG 0x00000e0000000000ull /* Interrupt Gate */ +#define SdTG 0x00000f0000000000ull /* Trap Gate */ +#define SdCODE 0x0000080000000000ull /* Code/Data */ +#define SdS 0x0000100000000000ull /* System/User */ +#define SdDPL0 0x0000000000000000ull /* Descriptor Privilege Level */ +#define SdDPL1 0x0000200000000000ull +#define SdDPL2 0x0000400000000000ull +#define SdDPL3 0x0000600000000000ull +#define SdP 0x0000800000000000ull /* Present */ +#define Sd4G 0x000f00000000ffffull /* 4G Limit */ +#define SdL 0x0020000000000000ull /* Long Attribute */ +#define SdD 0x0040000000000000ull /* Default Operand Size */ +#define SdG 0x0080000000000000ull /* Granularity */ + +#define PeHo 0x0000020000000000ull /* Host only */ +#define PeGo 0x0000010000000000ull /* Guest only */ +#define PeEvMskH 0x0000000f00000000ull /* Event mask H */ +#define PeCtMsk 0x00000000ff000000ull /* Counter mask */ +#define PeInMsk 0x0000000000800000ull /* Invert mask */ +#define PeCtEna 0x0000000000400000ull /* Counter enable */ +#define PeInEna 0x0000000000100000ull /* Interrupt enable */ +#define PePnCtl 0x0000000000080000ull /* Pin control */ +#define PeEdg 0x0000000000040000ull /* Edge detect */ +#define PeOS 0x0000000000020000ull /* OS mode */ +#define PeUsr 0x0000000000010000ull /* User mode */ +#define PeUnMsk 0x000000000000ff00ull /* Unit Mask */ +#define PeEvMskL 0x00000000000000ffull /* Event Mask L */ + +#define PeEvMsksh 32ull /* Event mask shift */ + +#define SsRPL0 0x0000 /* Requestor Privilege Level */ +#define SsRPL1 0x0001 +#define SsRPL2 0x0002 +#define SsRPL3 0x0003 +#define SsTIGDT 0x0000 /* GDT Table Indicator */ +#define SsTILDT 0x0004 /* LDT Table Indicator */ +#define SsSIM 0xfff8 /* Selector Index Mask */ + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +#define SiNULL 0 /* NULL selector index */ +#define SiCS 1 /* CS selector index */ +#define SiDS 2 /* DS selector index */ +#define SiU32CS 3 /* User CS selector index */ +#define SiUDS 4 /* User DS selector index */ +#define SiUCS 5 /* User CS selector index */ +#define SiFS 6 /* FS selector index */ +#define SiGS 7 /* GS selector index */ +#define SiTSS 8 /* TSS selector index */ + +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,98 @@ +/* + * There are 2 flavours of APIC, Local APIC and IOAPIC, + * Each I/O APIC has a unique physical address, + * Local APICs are all at the same physical address as they can only be + * accessed by the local CPU. APIC ids are unique to the + * APIC type, so an IOAPIC and APIC both with id 0 is ok. + */ +typedef struct Ioapic Ioapic; +typedef struct Lapic Lapic; +typedef struct Apic Apic; + +struct Ioapic { + Lock; /* register access */ + u32int* addr; /* register base */ + uintmem paddr; /* register base */ + int nrdt; /* size of RDT */ + int ibase; /* global interrupt base */ +}; + +struct Lapic { + int machno; /* APIC */ + + u32int lvt[7]; + int nlvt; + int ver; + + vlong hz; /* APIC Timer frequency */ + vlong max; + vlong min; + vlong div; +}; + +struct Apic { + int useable; /* en */ + Ioapic; + Lapic; +}; + +enum { + Nbus = 256, /* must be 256 */ + Napic = 254, /* xAPIC architectural limit */ + Nrdt = 128, +}; + +/* + * Common bits for + * IOAPIC Redirection Table Entry (RDT); + * APIC Local Vector Table Entry (LVT); + * APIC Interrupt Command Register (ICR). + * [10:8] Message Type + * [11] Destination Mode (RW) + * [12] Delivery Status (RO) + * [13] Interrupt Input Pin Polarity (RW) + * [14] Remote IRR (RO) + * [15] Trigger Mode (RW) + * [16] Interrupt Mask + */ +enum { + MTf = 0x00000000, /* Fixed */ + MTlp = 0x00000100, /* Lowest Priority */ + MTsmi = 0x00000200, /* SMI */ + MTrr = 0x00000300, /* Remote Read */ + MTnmi = 0x00000400, /* NMI */ + MTir = 0x00000500, /* INIT/RESET */ + MTsipi = 0x00000600, /* Startup IPI */ + MTei = 0x00000700, /* ExtINT */ + + Pm = 0x00000000, /* Physical Mode */ + Lm = 0x00000800, /* Logical Mode */ + + Ds = 0x00001000, /* Delivery Status */ + IPhigh = 0x00000000, /* IIPP High */ + IPlow = 0x00002000, /* IIPP Low */ + Rirr = 0x00004000, /* Remote IRR */ + TMedge = 0x00000000, /* Trigger Mode Edge */ + TMlevel = 0x00008000, /* Trigger Mode Level */ + Im = 0x00010000, /* Interrupt Mask */ +}; + +void apictimerenab(void); +int gsitoapicid(int, uint*); +void ioapicdump(void); +Apic* ioapicinit(int, int, uintmem); +void ioapicintrinit(int, int, int, int, int, u32int); +Apic* ioapiclookup(uint); +void ioapiconline(void); +void lapicdump(void); +int lapiceoi(int); +void lapicinit(int, uintmem, int); +void lapicipi(int); +int lapicisr(int); +Apic* lapiclookup(uint); +int lapiconline(void); +void lapicpri(int); +void lapicsipi(int, uintmem); + +int pcimsienable(Pcidev*, uvlong); +int pcimsimask(Pcidev*, int); --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,723 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define DBG(...) // print(__VA_ARGS__) + +enum { + Intel, + Amd, +}; + +enum { /* cpuid standard function codes */ + Highstdfunc = 0, + Procsig, +}; + +typedef struct Arch Arch; + +struct Arch { + int vendor; + char vstring[12+1]; +}; + +Mconf mconf; + +static Arch aarch; + +void +delay(int ms) +{ + u64int r, t; + + if(ms <= 0) + ms = 1; + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*ms; r < t; r = rdtsc()) + ; +} + +void +microdelay(int µs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*µs; r < t; r = rdtsc()) + ; +} + +static char* +append(char *s, char *e, uint r) +{ + if(s+4 <= e){ + memmove(s, &r, 4); + s += 4; + } + return s; +} + +static char* +brandstring(char *p, char *e) +{ + int i; + Cpuidreg r; + + for(i = 0; i < 3; i++){ + memset(&r, 0, sizeof r); + r.ax = 0x80000002+i; + cpuid(&r); + p = append(p, e, r.ax); + p = append(p, e, r.bx); + p = append(p, e, r.cx); + p = append(p, e, r.dx); + } + if(p == e) + p--; + *p = 0; + return p; +} + +/* use intel brand string to discover hz */ +static vlong +intelbshz(void) +{ + char s[4*4*3+1], *h; + uvlong scale; + + brandstring(s, s+sizeof s); + DBG("brandstring: %s\n", s); + + h = strstr(s, "Hz"); /* 3.07THz */ + if(h == nil || h-s < 5) + return 0; + h[2] = 0; + + scale = 1000; + switch(h[-1]){ + default: + return 0; + case 'T': + scale *= 1000; + case 'G': + scale *= 1000; + case 'M': + scale *= 1000; + } + + /* get rid of the fractional part */ + if(h[-4] == '.'){ + h[-4] = h[-5]; + h[-5] = ' '; + scale /= 100; + } + return strtoul(h-5, 0, 0)*scale; +} + +static vlong +cpuidhz(Cpuidreg *regs) +{ + int r; + vlong hz; + u64int msr; + + if(aarch.vendor == Intel){ + switch(regs->ax & 0x0fff3ff0){ + default: + hz = intelbshz(); + break; + } + DBG("cpuidhz: %#llud hz\n", hz); + } + else if(aarch.vendor == Amd){ + switch(regs->ax & 0x0fff0ff0){ + default: + return 0; + case 0x00000f50: /* K8 */ + msr = rdmsr(0xc0010042); + if(msr == 0) + return 0; + hz = (800 + 200*((msr>>1) & 0x1f)) * 1000000ll; + break; + case 0x00100f90: /* K10 */ + case 0x00000620: /* QEMU64 */ + msr = rdmsr(0xc0010064); + r = (msr>>6) & 0x07; + hz = (((msr & 0x3f)+0x10)*100000000ll)/(1<cpuhz = cpuidhz(&r); + m->cpumhz = m->cpuhz / 1000000ull; + if(m->cpuhz == 0) + panic("can't determine clock"); +} + +void +clockinit(void) +{ + char *p, *e; + Cpuidreg r; + + archinit(); + + /* hook to add other stuff */ + memset(&r, 0, sizeof r); + r.ax = Highstdfunc; + cpuid(&r); + e = aarch.vstring + sizeof aarch.vstring; + p = append(aarch.vstring, e, r.bx); + p = append(p, e, r.dx); + p = append(p, e, r.cx); + *p = '\0'; + + if(strcmp(aarch.vstring, "AuthenticAMD") == 0) + aarch.vendor = Amd; + else if(strcmp(aarch.vstring, "GenuineIntel") == 0) + aarch.vendor = Intel; + else + panic("unknown cpu vendor %s", aarch.vstring); +} + +void +printcpufreq(void) +{ + char buf[128], *p, *e; + + p = buf; + e = buf + sizeof buf; + p = seprint(p, e, "cpu%d: %dMHz ", 0, m->cpumhz); + p = seprint(p, e, "%s ", aarch.vstring); + p = brandstring(p, e); + seprint(p, e, "\n"); + print(buf); + print("\n"); +} + +void +cpuidentify(void) +{ + wrmsr(0x10, 0); /* reset tsc */ + + vsvminit(MACHSTKSZ); + archinit(); + printcpufreq(); +} + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 1024 bytes available at CONFADDR. + */ +#define CONFADDR ((char*)KADDR(0x1200)) /* info passed from boot loader */ +#define BOOTLINE CONFADDR +#define BOOTLINELEN 64 +#define BOOTARGS ((CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (1024-BOOTLINELEN) +#define MAXCONF 32 + +char bootdisk[NAMELEN]; +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; + +int +getcfields(char* lp, char** fields, int n, char* sep) +{ + int i; + + for(i = 0; lp && *lp && i < n; i++){ + while(*lp && strchr(sep, *lp) != 0) + *lp++ = 0; + if(*lp == 0) + break; + fields[i] = lp; + while(*lp && strchr(sep, *lp) == 0){ + if(*lp == '\\' && *(lp+1) == '\n') + *lp++ = ' '; + lp++; + } + } + + return i; +} + +static void +options(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getcfields(cp, line, MAXCONF, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == 0) + continue; + *cp++ = 0; + if(cp - line[i] >= NAMELEN+1) + *(line[i]+NAMELEN-1) = 0; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +typedef struct { + u64int base; + u64int lim; + u32int type; +}Emap; + +static char *etypes[] = +{ + "type=0", + "memory", + "reserved", + "acpi reclaim", + "acpi nvs", + "unusable", + "disable", +}; + +#define maxe820 32 + +/* debugging crap */ +uint ne820; +static Emap emap[maxe820]; + +void +cmd_e820(int, char **) +{ + uint n; + Emap *e, *end; + vlong sz, ex; + + print("found %ud e820 entries %ud banks\n", ne820, mconf.nbank); + + e = emap; + end = e+ne820; + + n = 0; + sz = 0; + ex = 0; + for(; ebase, e->lim); + if(e->type < nelem(etypes)) + print("%s\n", etypes[e->type]); + else + print("type=%ud\n", e->type); + + if(e->type != 1 || e->base == 0) + continue; + sz += e->lim - e->base; + if(++n == MAXBANK) + continue; + + print("\t" "bank %llux %llux\n", e->base, e->lim); + } + print("found %d e820 memory banks %lludMB+%lludMB\n", n, sz/MiB, ex/MiB); +} + +static void +e820(void) +{ + char *s, *f[32*3]; + uint i, n, bank; + Emap *e; + + s = getconf("*e820"); + if(s == nil) + panic("*e820 scan fails"); + n = getfields(s, f, nelem(f), 0, " "); + n -= n%3; + + bank = 0; + for(i = 0; i < n; i += 3){ + e = emap + ne820; + e->type = strtoull(f[i+0], 0, 0); + e->base = strtoull(f[i+1], 0, 0); + e->lim = strtoull(f[i+2], 0, 0); + ne820++; + bank |= e->type == 1 && e->base != 0; + } + if(bank == 0) + panic("*e820 scan fails"); + cmd_install("e820", "-- print e820 scan results", cmd_e820); +} + +static void +addbank(Emap *e) +{ + uintmem base; + Mbank *b; + + if(mconf.nbank == nelem(mconf.bank)){ + print("addbank: lost chunk %#P:%#P; increase MAXBANK\n", e->base, e->lim); + return; + } + base = e->base; + if(mconf.nbank == 0){ + if(e->lim < INIMAP) + panic("need at least %d bytes memory", INIMAP); + b = mconf.bank + mconf.nbank; + b->base = base + PADDR(end); + b->base = ROUNDUP(b->base, BY2PG); + b->limit = INIMAP; + mconf.nbank++; + base = INIMAP; + } + b = mconf.bank + mconf.nbank; + b->base = base; + b->limit = e->lim; + mconf.nbank++; +} + +void +bankinit(void) +{ + uint i; + Emap *e; + + for(i = 0; i < ne820; i++){ + e = emap + i; + if(e->type != 1 || e->base == 0) + continue; + addbank(e); + } +} + +extern void cmd_vec(int, char**); +extern void cmd_machvec(int, char**); + +void +vecinit(void) +{ + options(); + e820(); + bankinit(); + mmuinit(); + pcireset(); + trapinit(); + acpiinit(MACHMAX); +#ifdef MPS + mpsinit(MACHMAX); /* remove this */ +#endif + lapiconline(); + ioapiconline(); + sipi(); + fpuinit(); + + cmd_install("vec", "-- vectors", cmd_vec); /* move to trap.c */ + cmd_install("machvec", "-- vectors cnt", cmd_machvec); /* move to trap.c */ +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +void +lockinit(void) +{ +} + +void +idle(void) +{ + hardhalt(); +} + +void +launchinit(void) +{ +} + +void +lights(int, int) +{ +} + +Float +famd(Float a, int b, int c, int d) +{ + return ((a+b) * c) / d; +} + +ulong +fdf(Float a, int b) +{ + return a / b; +} + +uintmem +meminit(void) +{ + uint i; + uintmem sz; + + sz = 0; + for(i = 0; i < mconf.nbank; i++) + sz += mconf.bank[i].limit-mconf.bank[i].base; + return sz; +} + +static void +addmachpgsz(int bits) +{ + int i; + + i = m->npgsz; + m->pgszlg2[i] = bits; + m->pgszmask[i] = (1<pgsz[i] = 1<npgsz++; +} + +int +archmmu(void) +{ + Cpuidreg r; + + addmachpgsz(12); + addmachpgsz(21); + + /* + * Check the Page1GB bit in function 0x80000001 DX for 1*GiB support. + */ + r = (Cpuidreg){0x80000001, 0, 0, 0}; + cpuid(&r); + if(r.cx & 0x04000000) + addmachpgsz(30); + + return m->npgsz; +} + +void +userinit(void (*f)(void), void *arg, char *text) +{ + User *p; + + p = newproc(); + + /* + * Kernel Stack. + * The - sizeof(uintptr) is because the path sched()->gotolabel()->init0()->f() + * uses a stack location without creating any local space. + */ + p->sched.pc = (uintptr)init0; + p->sched.sp = (uintptr)p->stack + sizeof(p->stack) - sizeof(uintptr); + p->start = f; + p->text = text; + p->arg = arg; + + dofilter(&p->time); + ready(p); +} + +int +consgetc(void) +{ + int c; + + if(c = kbdgetc()) + return c; + if(c = cecgetc()) + return c; + return uartgetc(); +} + +void +consputs(char* s, int n) +{ + cgaputs(s, n); + cecputs(s, n); + uartputs(s, n); +} + +/* remove this crunchy junk */ +void +wave(int c) +{ + outb(0x3F8+0, c); + while((inb(0x3F8+5) & 1<<5) == 0) + ; +} + +void +waveprint(char *fmt, ...) +{ + int i; + static Lock lk; + + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + vseprint(buf, buf+sizeof buf, fmt, arg); + va_end(arg); + + ilock(&lk); + for(i = 0; buf[i] != 0; i++){ + wave(buf[i]); + cgaputc(buf[i]); + microdelay(100); + } + iunlock(&lk); +} + +void +consinit(void) +{ + char *p; + int baud, port; + + kbdinit(); + + if((p = getconf("console")) == 0 || cistrcmp(p, "cga") == 0) + return; + + port = strtoul(p, &p, 0); + if(port < 0 || port > 1) + return; + while(*p == ' ' || *p == '\t') + p++; + if(*p != 'b' || (baud = strtoul(p+1, 0, 0)) == 0) + baud = 9600; + + uartspecial(port, kbdchar, nil, baud); + conf.useuart = 1; +} + +void +consreset(void) +{ +} + +int +pciconfig(char *class, int ctlrno, Pciconf *pci) +{ + char cc[NAMELEN], *p, *q, *r; + int n; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + for(n = 0; n < nconf; n++){ + if(cistrncmp(confname[n], cc, NAMELEN)) + continue; + pci->nopt = 0; + p = confval[n]; + while(*p){ + while(*p == ' ' || *p == '\t') + p++; + if(*p == '\0') + break; + if(cistrncmp(p, "type=", 5) == 0){ + p += 5; + for(q = pci->type; q < &pci->type[NAMELEN-1]; q++){ + if(*p == '\0' || *p == ' ' || *p == '\t') + break; + *q = *p++; + } + *q = '\0'; + } + else if(cistrncmp(p, "port=", 5) == 0) + pci->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + pci->irq = strtoul(p+4, &p, 0); + else if(pci->nopt < Npciopt){ + r = pci->opt[pci->nopt]; + while(*p && *p != ' ' && *p != '\t'){ + *r++ = *p++; + if(r-pci->opt[pci->nopt] >= Pcioptlen-1) + break; + } + *r = '\0'; + pci->nopt++; + } + while(*p && *p != ' ' && *p != '\t') + p++; + } + return 1; + } + return 0; +} + +void +cycles(uvlong *t) +{ + *t = rdtsc(); +} + +void +firmware(void) +{ + char *p; + + /* + * Always called splhi(). + */ + if((p = getconf("reset")) && cistrcmp(p, "manual") == 0){ + predawn = 1; + print("\nHit Reset\n"); + for(;;); + } + pcireset(); + i8042reset(); +} + +void +clockreload(Timet) +{ +} + +void +archmach0init(Mach *m) +{ + m->stack = PTR2UINT(sys->machstk); + m->vsvm = sys->vsvmpage; + + m->online = 1; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,197 @@ +/* forward declarations */ +typedef struct User User; + +typedef struct Cpuidreg Cpuidreg; +typedef struct Filter Filter; +typedef ulong Float; +typedef struct Label Label; +typedef struct Lock Lock; +typedef struct MACH MACH; +typedef struct Mach Mach; +typedef struct Mbank Mbank; +typedef struct Mconf Mconf; +typedef struct MMMU MMMU; +typedef u64int Mpl; +typedef struct Page Page; +typedef struct Pcidev Pcidev; +typedef struct Perf Perf; +typedef u64int PTE; +typedef struct Sys Sys; +typedef u64int uintmem; + +/* fully declared elsewhere */ +#pragma incomplete Page +#pragma incomplete Pcidev + +#pragma varargck type "P" uintmem +#pragma varargck type "τ" int + + +enum { + NPGSZ = 4, +}; + +struct Lock +{ + u32int* sbsem; /* addr of sync bus semaphore */ + uintptr pc; + Mpl sr; + + Mach *m; + User *p; + char isilock; +}; + +enum { + MAXBANK = 8, +}; + +struct Mbank { + uintptr base; + uintptr limit; +}; + +struct Mconf { + Lock; + Mbank bank[MAXBANK]; + int nbank; +}; + +/* + * MMU stuff in Mach. + */ +struct MMMU +{ + Page* pml4; /* pml4 for this processor */ + PTE* pmap; /* unused as of yet */ + + uint pgszlg2[NPGSZ]; /* per Mach or per Sys? */ + uintmem pgszmask[NPGSZ]; + uint pgsz[NPGSZ]; + int npgsz; + + uchar pml4kludge[128]; +// Page pml4kludge; /* NIX KLUDGE: we need a page */ +}; + +/* + * performance timers, all units in perfticks + */ +struct Perf +{ + u64int intrts; /* time of last interrupt */ + u64int inintr; /* time since last clock tick in interrupt handlers */ + u64int avg_inintr; /* avg time per clock tick in interrupt handlers */ + u64int inidle; /* time since last clock tick in idle loop */ + u64int avg_inidle; /* avg time per clock tick in idle loop */ + u64int last; /* value of perfticks() at last clock tick */ + u64int period; /* perfticks() per clock tick */ +}; + +struct MACH +{ + uintptr splpc; /* known to assembly as 8(RMACH) */ + uintptr stack; + uchar* vsvm; + void* gdt; + void* tss; + + Lock apictimerlock; + int apicno; + int online; + + Perf perf; + + u64int rdtsc; +}; + +struct Filter +{ + ulong count; /* count and old count kept separate */ + ulong oldcount; /* so interrput can read them */ + Float filter[3]; /* filter */ +}; + +struct Label +{ + uintptr sp; + uintptr pc; +}; + +struct Mach +{ + int machno; /* physical id of processor */ + MACH; /* locations known to assmbly */ + int lights; /* light lights, this processor */ + Filter idle; + + User* proc; /* current process on this processor */ + Label sched; /* scheduler wakeup */ + + User* intrp; /* process that was interrupted */ + + int lastintr; + int spuriousintr; + + MMMU; + + int cpumhz; + uvlong cpuhz; +}; + +struct Cpuidreg { + u32int ax; + u32int bx; + u32int cx; + u32int dx; +}; + +/* + * This is the low memory map, between 0x100000 and 0x110000. + * It is located there to allow fundamental datastructures to be + * created and used before knowing where free memory begins + * (e.g. there may be modules located after the kernel BSS end). + * The layout is known in the bootstrap code in l32p.s. + * It is logically two parts: the per processor data structures + * for the bootstrap processor (stack, Mach, vsvm, and page tables), + * and the global information about the system (syspage, ptrpage). + * Some of the elements must be aligned on page boundaries, hence + * the unions. + */ +struct Sys { + uchar machstk[MACHSTKSZ]; + + PTE pml4[PTSZ/sizeof(PTE)]; /* */ + PTE pdp[PTSZ/sizeof(PTE)]; + PTE pd[PTSZ/sizeof(PTE)]; + PTE pt[PTSZ/sizeof(PTE)]; + + uchar vsvmpage[4*KiB]; + + union { + Mach mach; + uchar machpage[MACHSZ]; + }; + union { + struct { + long ticks; /* convert to u64int for tsc? */ + u64int epoch; /* tsc synchronization */ + }; + uchar syspage[4*KiB]; + }; + union { + Mach* machptr[MACHMAX]; + uchar ptrpage[4*KiB]; + }; + uchar pad[2][4096]; +}; + +extern Sys* sys; +extern Mach mach0; +extern Mconf mconf; +extern char nvrfile[128]; +extern register Mach* m; +extern register User* u; + +#define MACHP(n) sys->machptr[n] +#define Ticks sys->ticks --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,98 @@ +#include "all.h" + +enum { + Width = 160, + Height = 25, + + Attr = 7, /* white on black */ +}; + +#define BASE ((uchar*)(KZERO+0xB8000)) + +static int pos; +static int screeninitdone; +static Lock screenlock; + +static uchar +cgaregr(int index) +{ + outb(0x3D4, index); + return inb(0x3D4+1) & 0xFF; +} + +static void +cgaregw(int index, int data) +{ + outb(0x3D4, index); + outb(0x3D4+1, data); +} + +static void +movecursor(void) +{ + cgaregw(0x0E, (pos/2>>8) & 0xFF); + cgaregw(0x0F, pos/2 & 0xFF); + BASE[pos+1] = Attr; +} + +static void +cgascreenputc(int c) +{ + int i; + + if(c == '\n'){ + pos = pos/Width; + pos = (pos+1)*Width; + } + else if(c == '\t'){ + i = 4 - ((pos/2)&3); + while(i-->0) + cgascreenputc(' '); + } + else if(c == '\b'){ + if(pos >= 2) + pos -= 2; + cgascreenputc(' '); + pos -= 2; + } + else{ + BASE[pos++] = c; + BASE[pos++] = Attr; + } + if(pos >= Width*Height){ + memmove(BASE, &BASE[Width], Width*(Height-1)); + memset(&BASE[Width*(Height-1)], 0, Width); + pos = Width*(Height-1); + } + movecursor(); +} + +static void +screeninit(void) +{ + lock(&screenlock); + if(screeninitdone == 0){ + pos = cgaregr(0x0E)<<8; + pos |= cgaregr(0x0F); + pos *= 2; + screeninitdone = 1; + } + unlock(&screenlock); +} + +void +cgaputs(char* s, int n) +{ + if(screeninitdone == 0) + screeninit(); + while(n-- > 0) + cgascreenputc(*s++); +} + +void +cgaputc(int c) +{ + if(screeninitdone == 0) + screeninit(); + cgascreenputc(c); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,843 @@ +#include "all.h" +#include "io.h" + +#include "dosfs.h" + +#define chat(...) //print(__VA_ARGS__) + +/* + * block io buffers + */ +typedef struct Clustbuf Clustbuf; + +struct Clustbuf +{ + int flags; + int age; + Devsize sector; + uchar * iobuf; + Dos * dos; + int size; + int bufsize; +}; + +enum +{ + Nbio= 16, + LOCKED= 1, + MOD= 2, + IMMED= 4, +}; + +static void puttime(Dosdir*); + +static Clustbuf bio[Nbio]; + +/* + * write an io buffer and update its flags + */ +static void +writeclust(Clustbuf *p) +{ + Dos *dos; + Off addr; + + dos = p->dos; + addr = (p->sector+dos->start)*dos->sectbytes; + chat("writeclust @ %lld addr %lld...", (Wideoff)p->sector, + (Wideoff)addr); + if(dos->write(dos->dev, p->iobuf, p->size, addr) != p->size) + panic("writeclust: write"); + p->flags &= ~(MOD|IMMED); + chat("OK\n"); +} + +/* + * write any dirty buffers + */ +static void +syncclust(void) +{ + Clustbuf *p; + + for(p = bio; p < &bio[Nbio]; p++){ + if(p->flags & LOCKED) + panic("syncclust"); + if(p->flags & MOD) + writeclust(p); + } +} + +/* + * get an io buffer, possibly with valid data + */ +static Clustbuf* +getclust0(Dos *dos, Off sector) +{ + Clustbuf *p, *oldest; + + chat("getclust0 @ %lld\n", (Wideoff)sector); + + /* + * if we have it, just return it + * otherwise, reuse the oldest unlocked entry + */ + oldest = 0; + for(p = bio; p < &bio[Nbio]; p++){ + if(sector == p->sector && dos == p->dos){ + if(p->flags & LOCKED) + panic("getclust0 locked"); + chat("getclust0 %lld in cache\n", (Wideoff)sector); + p->flags |= LOCKED; + return p; + } + if(p->flags & LOCKED) + continue; + if(oldest == 0 || p->age <= oldest->age) + oldest = p; + } + p = oldest; + if(p == 0) + panic("getclust0 all locked"); + p->flags |= LOCKED; + if(p->flags & MOD) + writeclust(p); + + /* + * make sure the buffer is big enough + */ + if(p->iobuf==0 || p->bufsize < dos->clustbytes){ + p->bufsize = dos->clustbytes; + p->iobuf = ialloc(p->bufsize, 0); + } + if(sector >= dos->dataaddr) + p->size = dos->clustbytes; + else + p->size = dos->sectbytes; + p->dos = 0; /* make it invalid */ + return p; +} + +/* + * get an io block from an io buffer + */ +static Clustbuf* +getclust(Dos *dos, Off sector) +{ + Clustbuf *p; + Off addr; + + p = getclust0(dos, sector); + if(p->dos){ + p->age = Ticks; + return p; + } + addr = (sector+dos->start)*dos->sectbytes; + chat("getclust read addr %lld\n", (Wideoff)addr); + if(dos->read(dos->dev, p->iobuf, p->size, addr) != p->size){ + chat("can't read block\n"); + return 0; + } + + p->age = Ticks; + p->dos = dos; + p->sector = sector; + chat("getclust %lld read\n", (Wideoff)sector); + return p; +} + +/* + * get an io block from an io buffer; + * any current data is discarded. + */ +static Clustbuf* +getclustz(Dos *dos, Off sector) +{ + Clustbuf *p; + + p = getclust0(dos, sector); + p->age = Ticks; + p->dos = dos; + p->sector = sector; + memset(p->iobuf, 0, p->size); + p->flags |= MOD; + chat("getclustz %lld\n", (Wideoff)sector); + return p; +} + +/* + * release an io buffer + */ +static void +putclust(Clustbuf *p) +{ + if(!(p->flags & LOCKED)) + panic("putclust lock"); + if((p->flags & (MOD|IMMED)) == (MOD|IMMED)) + writeclust(p); + p->flags &= ~LOCKED; + chat("putclust @ sector %lld...", (Wideoff)p->sector); +} + +/* + * walk the fat one level ( n is a current cluster number ). + * return the new cluster number or -1 if no more. + */ +static long +fatwalk(Dos *dos, int n) +{ + uint k, sect; + Clustbuf *p; + int o; + + chat("fatwalk %d\n", n); + + if(n < 2 || n >= dos->fatclusters) + return -1; + + switch(dos->fatbits){ + case 12: + k = (3*n)/2; break; + case 16: + k = 2*n; break; + default: + return -1; + } + if(k >= dos->fatbytes) + panic("getfat"); + + sect = k/dos->sectbytes + dos->fataddr; + o = k%dos->sectbytes; + p = getclust(dos, sect); + k = p->iobuf[o++]; + if(o >= dos->sectbytes){ + putclust(p); + p = getclust(dos, sect+1); + o = 0; + } + k |= p->iobuf[o]<<8; + putclust(p); + if(dos->fatbits == 12){ + if(n&1) + k >>= 4; + else + k &= 0xfff; + if(k >= 0xff8) + k |= 0xf000; + } + k = k < 0xfff8 ? k : -1; + chat("fatwalk %d -> %lud\n", n, k); + return k; +} + +/* + * write a value into each copy of the fat. + */ +static void +fatwrite(Dos *dos, int n, int val) +{ + Off k, sect; + Clustbuf *p; + int i, o; + + chat("fatwrite %d %d...", n, val); + + if(n < 2 || n >= dos->fatclusters) + panic("fatwrite n"); + + switch(dos->fatbits){ + case 12: + k = (3*n)/2; break; + case 16: + k = 2*n; break; + default: + panic("fatwrite fatbits"); + return; + } + if(k >= dos->fatbytes) + panic("fatwrite k"); + + for(i=0; infats; i++, k+=dos->fatbytes){ + sect = k/dos->sectbytes + dos->fataddr; + o = k%dos->sectbytes; + p = getclust(dos, sect); + if(p == 0) + panic("fatwrite getclust"); + switch(dos->fatbits){ + case 12: + if(n&1){ + p->iobuf[o] &= 0x0f; + p->iobuf[o++] |= val<<4; + }else + p->iobuf[o++] = val; + if(o >= dos->sectbytes){ + p->flags |= MOD; + putclust(p); + p = getclust(dos, sect+1); + if(p == 0) + panic("fatwrite getclust"); + o = 0; + } + if(n&1) + p->iobuf[o] = val>>4; + else{ + p->iobuf[o] &= 0xf0; + p->iobuf[o] |= (val>>8)&0x0f; + } + break; + case 16: + p->iobuf[o++] = val; + p->iobuf[o] = val>>8; + break; + } + p->flags |= MOD; + putclust(p); + } + chat("OK\n"); +} + +/* + * allocate a free cluster from the fat. + */ +static int +fatalloc(Dos *dos) +{ + Clustbuf *p; + int n; + + n = dos->freeptr; + for(;;){ + if(fatwalk(dos, n) == 0) + break; + if(++n >= dos->fatclusters) + n = 2; + if(n == dos->freeptr) + return -1; + } + dos->freeptr = n+1; + if(dos->freeptr >= dos->fatclusters) + dos->freeptr = 2; + fatwrite(dos, n, 0xffff); + p = getclustz(dos, dos->dataaddr + (n-2)*dos->clustsize); + putclust(p); + return n; +} + +/* + * map a file's logical sector address to a physical sector address + */ +static long +fileaddr(Dosfile *fp, Off ltarget, Clustbuf *pdir) +{ + Dos *dos = fp->dos; + Dosdir *dp; + Off p; + + chat("fileaddr %8.8s %lld\n", fp->name, (Wideoff)ltarget); + /* + * root directory is contiguous and easy + */ + if(fp->pdir == 0){ + if(ltarget*dos->sectbytes >= dos->rootsize*sizeof(Dosdir)) + return -1; + p = dos->rootaddr + ltarget; + chat("fileaddr %lld -> %lld\n", (Wideoff)ltarget, (Wideoff)p); + return p; + } + if(fp->pstart == 0){ /* empty file */ + if(!pdir) + return -1; + p = fatalloc(dos); + if(p <= 0) + return -1; + chat("fileaddr initial alloc %lld\n", (Wideoff)p); + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + dp->start[0] = p; + dp->start[1] = p>>8; + pdir->flags |= MOD; + fp->pstart = p; + fp->pcurrent = p; + fp->lcurrent = 0; + } + /* + * anything else requires a walk through the fat + * [lp]current will point to the last cluster if we run off the end + */ + ltarget /= dos->clustsize; + if(fp->pcurrent == 0 || fp->lcurrent > ltarget){ + /* go back to the beginning */ + fp->lcurrent = 0; + fp->pcurrent = fp->pstart; + } + while(fp->lcurrent < ltarget){ + /* walk the fat */ + p = fatwalk(dos, fp->pcurrent); + if(p < 0){ + if(!pdir) + return -1; + p = fatalloc(dos); + if(p < 0){ + print("file system full\n"); + return -1; + } + fatwrite(dos, fp->pcurrent, p); + } + fp->pcurrent = p; + ++fp->lcurrent; + } + + /* + * clusters start at 2 instead of 0 (why? - presotto) + */ + p = dos->dataaddr + (fp->pcurrent-2)*dos->clustsize; + chat("fileaddr %lld -> %lld\n", (Wideoff)ltarget, (Wideoff)p); + return p; +} + +/* + * set up a dos file name + */ +static void +setname(char *name, char *ext, char *from) +{ + char *to; + + memset(name, ' ', 8); + memset(ext, ' ', 3); + + to = name; + for(; *from && to-name < 8; from++, to++){ + if(*from == '.'){ + from++; + break; + } + if(*from >= 'a' && *from <= 'z') + *to = *from + 'A' - 'a'; + else + *to = *from; + } + to = ext; + for(; *from && to-ext < 3; from++, to++){ + if(*from >= 'a' && *from <= 'z') + *to = *from + 'A' - 'a'; + else + *to = *from; + } + + chat("name is %8.8s %3.3s\n", name, ext); +} + +/* + * walk a directory returns + * -1 if something went wrong + * 0 if not found + * 1 if found + */ +static int +doswalk(Dosfile *fp, char *name) +{ + char dname[8], dext[3]; + Clustbuf *p; + Dosdir *dp; + Off o, addr; + + chat("walk(%s)\n", name); + if((fp->attr & DOSDIR) == 0){ + chat("walking non-directory!\n"); + return -1; + } + + setname(dname, dext, name); + + fp->offset = 0; /* start at the beginning */ + for(;;){ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, 0); + if(addr < 0) + return 0; + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + for(o=0; osize; o += sizeof(Dosdir)){ + dp = (Dosdir *)(p->iobuf + o); + chat("comparing to %8.8s.%3.3s\n", (char*)dp->name, (char*)dp->ext); + if(memcmp(dname, dp->name, sizeof(dp->name)) != 0) + continue; + if(memcmp(dext, dp->ext, sizeof(dp->ext)) == 0) + goto Found; + } + fp->offset += p->size; + putclust(p); + } + +Found: + fp->pdir = p->sector; + fp->odir = o; + putclust(p); + memmove(fp->name, dname, sizeof(fp->name)); + memmove(fp->ext, dext, sizeof(fp->ext)); + fp->attr = dp->attr; + fp->length = GLONG(dp->length); + fp->pstart = GSHORT(dp->start); + fp->pcurrent = 0; + fp->lcurrent = 0; + fp->offset = 0; + return 1; +} + +static void +bootdump(Dosboot *b) +{ + USED(b); + chat("magic: 0x%2.2x 0x%2.2x 0x%2.2x\n", + b->magic[0], b->magic[1], b->magic[2]); + chat("version: \"%8.8s\"\n", (char*)b->version); + chat("sectbytes: %d\n", GSHORT(b->sectbytes)); + chat("allocsize: %d\n", b->clustsize); + chat("nresrv: %d\n", GSHORT(b->nresrv)); + chat("nfats: %d\n", b->nfats); + chat("rootsize: %d\n", GSHORT(b->rootsize)); + chat("volsize: %d\n", GSHORT(b->volsize)); + chat("mediadesc: 0x%2.2x\n", b->mediadesc); + chat("fatsize: %d\n", GSHORT(b->fatsize)); + chat("trksize: %d\n", GSHORT(b->trksize)); + chat("nheads: %d\n", GSHORT(b->nheads)); + chat("nhidden: %d\n", GLONG(b->nhidden)); + chat("bigvolsize: %d\n", GLONG(b->bigvolsize)); + chat("driveno: %d\n", b->driveno); + chat("reserved0: 0x%2.2x\n", b->reserved0); + chat("bootsig: 0x%2.2x\n", b->bootsig); + chat("volid: 0x%8.8x\n", GLONG(b->volid)); + chat("label: \"%11.11s\"\n", (char*)b->label); +} + +/* + * instructions that boot blocks can start with + */ +#define JMPSHORT 0xeb +#define JMPNEAR 0xe9 + +/* + * read dos file system properties + */ +int +dosinit(Dos *dos) +{ + Clustbuf *p; + Dospart *dp; + Dosboot *b; + int i; + + chat("dosinit()\n"); + /* defaults till we know better */ + dos->start = 0; + dos->sectbytes = 512; + dos->clustsize = 1; + dos->clustbytes = 512; + + /* get first sector */ + p = getclust(dos, 0); + if(p == 0){ + chat("can't read boot block\n"); + return -1; + } + p->dos = 0; + + /* if a hard disk format, look for an active partition */ + b = (Dosboot *)p->iobuf; + if(b->magic[0] != JMPNEAR && (b->magic[0] != JMPSHORT || b->magic[2] != 0x90)){ + /* is the 0x55 in error here? */ + if(p->iobuf[0x1fe] != 0x55 || p->iobuf[0x1ff] != 0xaa){ + print("no dos file system or partition table\n"); + putclust(p); + return -1; + } + dp = (Dospart*)&p->iobuf[0x1be]; + for(i = 0; i < 4; i++, dp++) + if(dp->type && dp->flag == 0x80) + break; + if(i == 4){ + putclust(p); + return -1; + } + dos->start += GLONG(dp->start); + putclust(p); + p = getclust(dos, 0); + if(p == 0){ + chat("can't read boot block\n"); + putclust(p); + return -1; + } + p->dos = 0; + } + + b = (Dosboot *)p->iobuf; + if(b->magic[0] != JMPNEAR && (b->magic[0] != JMPSHORT || b->magic[2] != 0x90)){ + print("no dos file system\n"); + putclust(p); + return -1; + } + + bootdump(b);/**/ + + /* + * determine the systems' wonderous properties + */ + dos->sectbytes = GSHORT(b->sectbytes); + dos->clustsize = b->clustsize; + dos->clustbytes = dos->sectbytes*dos->clustsize; + dos->nresrv = GSHORT(b->nresrv); + dos->nfats = b->nfats; + dos->rootsize = GSHORT(b->rootsize); + dos->volsize = GSHORT(b->volsize); + if(dos->volsize == 0) + dos->volsize = GLONG(b->bigvolsize); + dos->mediadesc = b->mediadesc; + dos->fatsize = GSHORT(b->fatsize); + dos->fatbytes = dos->sectbytes*dos->fatsize; + dos->fataddr = dos->nresrv; + dos->rootaddr = dos->fataddr + dos->nfats*dos->fatsize; + i = dos->rootsize*sizeof(Dosdir) + dos->sectbytes - 1; + i = i/dos->sectbytes; + dos->dataaddr = dos->rootaddr + i; + dos->fatclusters = 2+(dos->volsize - dos->dataaddr)/dos->clustsize; + if(dos->fatclusters < 4087) + dos->fatbits = 12; + else + dos->fatbits = 16; + dos->freeptr = 2; + putclust(p); + + /* + * set up the root + */ + dos->root.dos = dos; + dos->root.pdir = 0; + dos->root.odir = 0; + memmove(dos->root.name, " ", 8); + memmove(dos->root.ext, " ", 3); + dos->root.attr = DOSDIR; + dos->root.length = dos->rootsize*sizeof(Dosdir); + dos->root.pstart = 0; + dos->root.lcurrent = 0; + dos->root.pcurrent = 0; + dos->root.offset = 0; + + syncclust(); + return 0; +} + +static char * +nextelem(char *path, char *elem) +{ + int i; + + while(*path == '/') + path++; + if(*path==0 || *path==' ') + return 0; + for(i=0; *path && *path != '/' && *path != ' '; i++){ + if(i >= NAMELEN){ + print("name component too long\n"); + return 0; + } + *elem++ = *path++; + } + *elem = 0; + return path; +} + +static void +puttime(Dosdir *d) +{ + Timet secs; + Rtc rtc; + ushort x; + + secs = rtctime(); + sec2rtc(secs, &rtc); + x = (rtc.hour<<11) | (rtc.min<<5) | (rtc.sec>>1); + d->time[0] = x; + d->time[1] = x>>8; + x = ((rtc.year-80)<<9) | ((rtc.mon+1)<<5) | rtc.mday; + d->date[0] = x; + d->date[1] = x>>8; +} + +Dosfile* +dosopen(Dos *dos, char *path, Dosfile *fp) +{ + char element[NAMELEN]; + + chat("dosopen(%s)\n", path); + *fp = dos->root; + while(path = nextelem(path, element)){ + switch(doswalk(fp, element)){ + case -1: + print("error walking to %s\n", element); + return 0; + case 0: + print("%s not found\n", element); + return 0; + case 1: + print("found %s attr 0x%ux start 0x%llux len %lld\n", + element, fp->attr, (Wideoff)fp->pstart, + (Wideoff)fp->length); + break; + } + } + + syncclust(); + return fp; +} + +/* + * read from a dos file + */ +long +dosread(Dosfile *fp, void *a, long n) +{ + Off addr, k, o; + Clustbuf *p; + uchar *to; + + chat("dosread(,,%ld)\n", n); + if((fp->attr & DOSDIR) == 0){ + if(fp->offset >= fp->length) + return 0; + if(fp->offset+n > fp->length) + n = fp->length - fp->offset; + } + to = a; + while(n > 0){ + /* + * read the data; sectors below dos->dataaddr + * are read one at a time. + */ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, 0); + if(addr < 0) + return -1; + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + /* + * copy the bytes we need + */ + o = fp->offset % p->size; + k = p->size - o; + if(k > n) + k = n; + memmove(to, p->iobuf+o, k); + putclust(p); + to += k; + fp->offset += k; + n -= k; + } + syncclust(); + return to - (uchar *)a; +} + +/* + * write to a dos file + */ +long +doswrite(Dosfile *fp, void *a, long n) +{ + Off blksize, addr, k, o; + Clustbuf *p, *pdir; + Dosdir *dp; + uchar *from; + + if(fp->attr & DOSDIR){ + print("write dir\n"); + return -1; + } + if(fp->pdir){ + pdir = getclust(fp->dos, fp->pdir); + /* + * should do consistency check if + * concurrent access is possible. + */ + if(pdir == 0) + panic("doswrite"); + }else + pdir = 0; + blksize = pdir ? fp->dos->clustbytes : fp->dos->sectbytes; + from = a; + while(n > 0){ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, pdir); + if(addr < 0) + return -1; + o = fp->offset % blksize; + if(o == 0 && n >= blksize) + p = getclustz(fp->dos, addr); + else + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + /* + * copy the bytes we need + */ + k = p->size - o; + if(k > n) + k = n; + memmove(p->iobuf+o, from, k); + p->flags |= MOD; + putclust(p); + from += k; + fp->offset += k; + n -= k; + } + if(pdir){ + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + if(fp->offset > fp->length){ + fp->length = fp->offset; + dp->length[0] = fp->length; + dp->length[1] = fp->length>>8; + dp->length[2] = fp->length>>16; + dp->length[3] = fp->length>>24; + } + pdir->flags |= MOD; + putclust(pdir); + } + syncclust(); + return from - (uchar *)a; +} + +/* + * truncate a dos file to zero length + */ +int +dostrunc(Dosfile *fp) +{ + Clustbuf *pdir; + Dosdir *dp; + Off p, np; + + if(fp->attr & DOSDIR){ + print("trunc dir\n"); + return -1; + } + pdir = getclust(fp->dos, fp->pdir); + if(pdir == 0) + panic("dostrunc"); + p = fatwalk(fp->dos, fp->pstart); + fatwrite(fp->dos, fp->pstart, 0xffff); + while(p >= 0){ + np = fatwalk(fp->dos, p); + fatwrite(fp->dos, p, 0); + p = np; + } + fp->length = 0; + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + dp->length[0] = 0; + dp->length[1] = 0; + dp->length[2] = 0; + dp->length[3] = 0; + pdir->flags |= MOD; + putclust(pdir); + syncclust(); + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,126 @@ +typedef struct Dosboot Dosboot; +typedef struct Dos Dos; +typedef struct Dosdir Dosdir; +typedef struct Dosfile Dosfile; +typedef struct Dospart Dospart; + +struct Dospart +{ + uchar flag; /* active flag */ + uchar shead; /* starting head */ + uchar scs[2]; /* starting cylinder/sector */ + uchar type; /* partition type */ + uchar ehead; /* ending head */ + uchar ecs[2]; /* ending cylinder/sector */ + uchar start[4]; /* starting sector */ + uchar len[4]; /* length in sectors */ +}; + +struct Dosboot{ + uchar magic[3]; + uchar version[8]; + uchar sectbytes[2]; + uchar clustsize; + uchar nresrv[2]; + uchar nfats; + uchar rootsize[2]; + uchar volsize[2]; + uchar mediadesc; + uchar fatsize[2]; + uchar trksize[2]; + uchar nheads[2]; + uchar nhidden[4]; + uchar bigvolsize[4]; + uchar driveno; + uchar reserved0; + uchar bootsig; + uchar volid[4]; + uchar label[11]; + uchar reserved1[8]; +}; + +struct Dosfile{ + Dos * dos; /* owning dos file system */ + int pdir; /* sector containing directory entry */ + int odir; /* offset to same */ + char name[8]; + char ext[3]; + uchar attr; + Devsize length; + Devsize pstart; /* physical start cluster address */ + Devsize pcurrent; /* physical current cluster address */ + Devsize lcurrent; /* logical current cluster address */ + Devsize offset; +}; + +struct Dos{ + int dev; /* device id */ + Off (*read)(int, void*, long, Devsize); /* read routine */ + Off (*write)(int, void*, long, Devsize); /* write routine */ + + uvlong start; /* start of file system (sector no.) */ + int sectbytes; /* size of a sector */ + int clustsize; /* size of a cluster (in sectors) */ + int clustbytes; /* size of a cluster (in bytes) */ + int nresrv; /* sectors */ + int nfats; /* usually 2 */ + int rootsize; /* number of entries */ + int volsize; /* in sectors */ + int mediadesc; + int fatsize; /* size of a fat (in sectors) */ + int fatbytes; /* size of a fat (in bytes) */ + int fatclusters; /* no. of clusters governed by fat */ + int fatbits; /* 12 or 16 */ + Devsize fataddr; /* sector address of first fat */ + Devsize rootaddr; /* sector address of root directory */ + Devsize dataaddr; /* sector address of first data block */ + Devsize freeptr; /* for cluster allocation */ + + Dosfile root; +}; + +struct Dosdir{ + uchar name[8]; + uchar ext[3]; + uchar attr; + uchar reserved[10]; + uchar time[2]; + uchar date[2]; + uchar start[2]; + uchar length[4]; +}; + +enum{ + FAT12 = 0x01, + FAT16 = 0x04, + EXTEND = 0x05, + FATHUGE = 0x06, + FAT32 = 0x0b, + FAT32X = 0x0c, + EXTHUGE = 0x0f, + DMDDO = 0x54, + PLAN9 = 0x39, + LEXTEND = 0x85, +}; + +enum{ + DRONLY = 0x01, + DHIDDEN = 0x02, + DSYSTEM = 0x04, + DVLABEL = 0x08, + DOSDIR = 0x10, + DARCH = 0x20, +}; + +#define GSHORT(p) (((p)[1]<<8)|(p)[0]) +#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p)) +#define GLSHORT(p) (((p)[0]<<8)|(p)[1]) +#define GLLONG(p) ((GLSHORT(p)<<16)|GLSHORT(p+2)) + +extern int dosinit(Dos*); +extern Dosfile* dosopen(Dos*, char*, Dosfile*); +extern int dostrunc(Dosfile*); +extern long dosread(Dosfile*, void*, long); +extern long doswrite(Dosfile*, void*, long); + +extern Dos dos; --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,1100 @@ +/* + * Realtek RTL8110S/8169S. + * Mostly there. There are some magic register values used + * which are not described in any datasheet or driver but seem + * to be necessary. + * No tuning has been done. Only tested on an RTL8110S, there + * are slight differences between the chips in the series so some + * tweaks may be needed. + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" +//#include "ethermii.h" + +#define dprint(...) print("ether 8169: " __VA_ARGS__); + +enum { /* registers */ + Idr0 = 0x00, /* MAC address */ + Mar0 = 0x08, /* Multicast address */ + Dtccr = 0x10, /* Dump Tally Counter Command */ + Tnpds = 0x20, /* Transmit Normal Priority Descriptors */ + Thpds = 0x28, /* Transmit High Priority Descriptors */ + Flash = 0x30, /* Flash Memory Read/Write */ + Erbcr = 0x34, /* Early Receive Byte Count */ + Ersr = 0x36, /* Early Receive Status */ + Cr = 0x37, /* Command Register */ + Tppoll = 0x38, /* Transmit Priority Polling */ + Imr = 0x3C, /* Interrupt Mask */ + Isr = 0x3E, /* Interrupt Status */ + Tcr = 0x40, /* Transmit Configuration */ + Rcr = 0x44, /* Receive Configuration */ + Tctr = 0x48, /* Timer Count */ + Mpc = 0x4C, /* Missed Packet Counter */ + Cr9346 = 0x50, /* 9346 Command Register */ + Config0 = 0x51, /* Configuration Register 0 */ + Config1 = 0x52, /* Configuration Register 1 */ + Config2 = 0x53, /* Configuration Register 2 */ + Config3 = 0x54, /* Configuration Register 3 */ + Config4 = 0x55, /* Configuration Register 4 */ + Config5 = 0x56, /* Configuration Register 5 */ + Timerint = 0x58, /* Timer Interrupt */ + Mulint = 0x5C, /* Multiple Interrupt Select */ + Phyar = 0x60, /* PHY Access */ + Tbicsr0 = 0x64, /* TBI Control and Status */ + Tbianar = 0x68, /* TBI Auto-Negotiation Advertisment */ + Tbilpar = 0x6A, /* TBI Auto-Negotiation Link Partner */ + Phystatus = 0x6C, /* PHY Status */ + + Rms = 0xDA, /* Receive Packet Maximum Size */ + Cplusc = 0xE0, /* C+ Command */ + Coal = 0xE2, /* Interrupt Mitigation (Coalesce) */ + Rdsar = 0xE4, /* Receive Descriptor Start Address */ + Etx = 0xEC, /* Early Transmit Threshold */ +}; + +enum { /* Dtccr */ + Cmd = 0x00000008, /* Command */ +}; + +enum { /* Cr */ + Te = 0x04, /* Transmitter Enable */ + Re = 0x08, /* Receiver Enable */ + Rst = 0x10, /* Software Reset */ +}; + +enum { /* Tppoll */ + Fswint = 0x01, /* Forced Software Interrupt */ + Npq = 0x40, /* Normal Priority Queue polling */ + Hpq = 0x80, /* High Priority Queue polling */ +}; + +enum { /* Imr/Isr */ + Rok = 0x0001, /* Receive OK */ + Rer = 0x0002, /* Receive Error */ + Tok = 0x0004, /* Transmit OK */ + Ter = 0x0008, /* Transmit Error */ + Rdu = 0x0010, /* Receive Descriptor Unavailable */ + Punlc = 0x0020, /* Packet Underrun or Link Change */ + Fovw = 0x0040, /* Receive FIFO Overflow */ + Tdu = 0x0080, /* Transmit Descriptor Unavailable */ + Swint = 0x0100, /* Software Interrupt */ + Timeout = 0x4000, /* Timer */ + Serr = 0x8000, /* System Error */ +}; + +enum { /* Tcr */ + MtxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MtxdmaMASK = 0x00000700, + Mtxdmaunlimited = 0x00000700, + Acrc = 0x00010000, /* Append CRC (not) */ + Lbk0 = 0x00020000, /* Loopback Test 0 */ + Lbk1 = 0x00040000, /* Loopback Test 1 */ + Ifg2 = 0x00080000, /* Interframe Gap 2 */ + HwveridSHIFT = 23, /* Hardware Version ID */ + HwveridMASK = 0x7C800000, + Macv01 = 0x00000000, /* RTL8169 */ + Macv02 = 0x00800000, /* RTL8169S/8110S */ + Macv03 = 0x04000000, /* RTL8169S/8110S */ + Macv04 = 0x10000000, /* RTL8169SB/8110SB */ + Macv05 = 0x18000000, /* RTL8169SC/8110SC */ + Macv07 = 0x24800000, /* RTL8102e */ + Macv07a = 0x34800000, /* RTL8102e */ + Macv11 = 0x30000000, /* RTL8168B/8111B */ + Macv12 = 0x38000000, /* RTL8169B/8111B */ + Macv13 = 0x34000000, /* RTL8101E */ + Macv14 = 0x30800000, /* RTL8100E */ + Macv15 = 0x38800000, /* RTL8100E */ + Macv19 = 0x3c000000, /* RTL8111c-gr */ + Macv25 = 0x28000000, /* RTL8168D */ + Macv26 = 0x48000000, /* RTL8111/8168B */ + Macv27 = 0x2c800000, /* RTL8111e */ + Macv28 = 0x2c000000, /* RTL8111/8168B */ + Macv29 = 0x40800000, /* RTL8101/8102E */ + Ifg0 = 0x01000000, /* Interframe Gap 0 */ + Ifg1 = 0x02000000, /* Interframe Gap 1 */ +}; + +enum { /* Rcr */ + Aap = 0x00000001, /* Accept All Packets */ + Apm = 0x00000002, /* Accept Physical Match */ + Am = 0x00000004, /* Accept Multicast */ + Ab = 0x00000008, /* Accept Broadcast */ + Ar = 0x00000010, /* Accept Runt */ + Aer = 0x00000020, /* Accept Error */ + Sel9356 = 0x00000040, /* 9356 EEPROM used */ + MrxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MrxdmaMASK = 0x00000700, + Mrxdmaunlimited = 0x00000700, + RxfthSHIFT = 13, /* Receive Buffer Length */ + RxfthMASK = 0x0000E000, + Rxfth256 = 0x00008000, + Rxfthnone = 0x0000E000, + Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */ + MulERINT = 0x01000000, /* Multiple Early Interrupt Select */ +}; + +enum { /* Cr9346 */ + Eedo = 0x01, /* */ + Eedi = 0x02, /* */ + Eesk = 0x04, /* */ + Eecs = 0x08, /* */ + Eem0 = 0x40, /* Operating Mode */ + Eem1 = 0x80, +}; + +enum { /* Phyar */ + DataMASK = 0x0000FFFF, /* 16-bit GMII/MII Register Data */ + DataSHIFT = 0, + RegaddrMASK = 0x001F0000, /* 5-bit GMII/MII Register Address */ + RegaddrSHIFT = 16, + PhyFlag = 0x80000000, /* */ +}; + +enum { /* Phystatus */ + Fd = 0x01, /* Full Duplex */ + Linksts = 0x02, /* Link Status */ + Speed10 = 0x04, /* */ + Speed100 = 0x08, /* */ + Speed1000 = 0x10, /* */ + Rxflow = 0x20, /* */ + Txflow = 0x40, /* */ + Entbi = 0x80, /* */ +}; + +enum { /* Cplusc */ + Mulrw = 0x0008, /* PCI Multiple R/W Enable */ + Dac = 0x0010, /* PCI Dual Address Cycle Enable */ + Rxchksum = 0x0020, /* Receive Checksum Offload Enable */ + Rxvlan = 0x0040, /* Receive VLAN De-tagging Enable */ + Endian = 0x0200, /* Endian Mode */ +}; + +typedef struct D D; /* Transmit/Receive Descriptor */ +struct D { + u32int control; + u32int vlan; + u32int addrlo; + u32int addrhi; +}; + +enum { /* Transmit Descriptor control */ + TxflMASK = 0x0000FFFF, /* Transmit Frame Length */ + TxflSHIFT = 0, + Tcps = 0x00010000, /* TCP Checksum Offload */ + Udpcs = 0x00020000, /* UDP Checksum Offload */ + Ipcs = 0x00040000, /* IP Checksum Offload */ + Lgsen = 0x08000000, /* TSO; WARNING: contains lark's vomit */ +}; + +enum { /* Receive Descriptor control */ + RxflMASK = 0x00001FFF, /* Receive Frame Length */ + Tcpf = 0x00004000, /* TCP Checksum Failure */ + Udpf = 0x00008000, /* UDP Checksum Failure */ + Ipf = 0x00010000, /* IP Checksum Failure */ + Pid0 = 0x00020000, /* Protocol ID0 */ + Pid1 = 0x00040000, /* Protocol ID1 */ + Crce = 0x00080000, /* CRC Error */ + Runt = 0x00100000, /* Runt Packet */ + Res = 0x00200000, /* Receive Error Summary */ + Rwt = 0x00400000, /* Receive Watchdog Timer Expired */ + Fovf = 0x00800000, /* FIFO Overflow */ + Bovf = 0x01000000, /* Buffer Overflow */ + Bar = 0x02000000, /* Broadcast Address Received */ + Pam = 0x04000000, /* Physical Address Matched */ + Mar = 0x08000000, /* Multicast Address Received */ +}; + +enum { /* General Descriptor control */ + Ls = 0x10000000, /* Last Segment Descriptor */ + Fs = 0x20000000, /* First Segment Descriptor */ + Eor = 0x40000000, /* End of Descriptor Ring */ + Own = 0x80000000, /* Ownership */ +}; + +/* + */ +enum { /* Ring sizes (<= 1024) */ + Ntd = 64, /* Transmit Ring */ + Nrd = 256, /* Receive Ring */ + + Stdbuf = 1536, + Mtu = 7000, /* performance limited */ + Mps = Mtu + 8 + 14, /* if(mtu>ETHERMAXTU) */ +// Mps = ROUNDUP(ETHERMAXTU+4, 128), +}; + +typedef struct Dtcc Dtcc; +struct Dtcc { + u64int txok; + u64int rxok; + u64int txer; + u32int rxer; + u16int misspkt; + u16int fae; + u32int tx1col; + u32int txmcol; + u64int rxokph; + u64int rxokbrd; + u32int rxokmu; + u16int txabt; + u16int txundrn; +}; + +enum { /* Variants */ + Rtl8100e = (0x8136<<16)|0x10EC, /* RTL810[01]E: pci -e */ + Rtl8169c = (0x0116<<16)|0x16EC, /* RTL8169C+ (USR997902) */ + Rtl8169sc = (0x8167<<16)|0x10EC, /* RTL8169SC */ + Rtl8168b = (0x8168<<16)|0x10EC, /* RTL8168B: pci-e */ + Rtl8169 = (0x8169<<16)|0x10EC, /* RTL8169 */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + QLock alock; /* attach */ + Lock ilock; /* init */ + int init; /* */ + + int pciv; /* */ + int macv; /* MAC version */ + int phyv; /* PHY version */ + int pcie; /* flag: pci-express device? */ + +// Mii* mii; + + Lock tlock; /* transmit */ + D* td; /* descriptor ring */ + Msgbuf** tb; /* transmit buffers */ + int ntd; + + int tdh; /* head - producer index (host) */ + int tdt; /* tail - consumer index (NIC) */ + int ntdfree; + int ntq; + + Lock rlock; /* receive */ + D* rd; /* descriptor ring */ + Msgbuf** rb; /* receive buffers */ + int nrd; + + int rdh; /* head - producer index (NIC) */ + int rdt; /* tail - consumer index (host) */ + int nrdfree; + + int tcr; /* transmit configuration register */ + int rcr; /* receive configuration register */ + int imr; + + QLock slock; /* statistics */ + Dtcc* dtcc; + uint txdu; + uint tcpf; + uint udpf; + uint ipf; + uint fovf; + uint ierrs; + uint rer; + uint rdu; + uint punlc; + uint fovw; +} Ctlr; + +static Ctlr* rtl8169ctlrhead; +static Ctlr* rtl8169ctlrtail; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (u8int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (u16int)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (u32int)(l))) + +#ifdef notdef +static int +·rtl8169miimir(Ctlr *ctlr, int pa, int ra) +{ + uint r; + int timeo; + + assert(pa == 1); + r = (ra<<16) & RegaddrMASK; + csr32w(ctlr, Phyar, r); + delay(1); + for(timeo = 0; timeo < 2000; timeo++){ + if((r = csr32r(ctlr, Phyar)) & Flag) + break; + microdelay(100); + } + if(!(r & Flag)) + return -1; + + return (r & DataMASK)>>DataSHIFT; +} + +static int +rtl8169miimir(Mii *mii, int pa, int ra) +{ + if(pa != 1) + return -1; + return ·rtl8169miimir(mii->ctlr, pa, ra); +} + +static int +·rtl8169miimiw(Ctlr *ctlr, int pa, int ra, int data) +{ + uint r; + int timeo; + + assert(pa == 1); + r = Flag|((ra<<16) & RegaddrMASK)|((data<ctlr, pa, ra, data); +} + +static Mii* +rtl8169mii(Ctlr* ctlr) +{ + Mii* mii; + MiiPhy *phy; + + /* + * Link management. + * + * Get rev number out of Phyidr2 so can config properly. + * There's probably more special stuff for Macv0[234] needed here. + */ + ctlr->phyv = ·rtl8169miimir(ctlr, 1, Phyidr2) & 0x0F; + if(ctlr->macv == Macv02){ + csr8w(ctlr, 0x82, 1); /* magic */ + ·rtl8169miimiw(ctlr, 1, 0x0B, 0x0000); /* magic */ + } + if((mii = miiattach(ctlr, (1<<1), rtl8169miimir, rtl8169miimiw)) == nil) + return nil; + + phy = mii->curphy; + switch(ctlr->macv){ + case Macv28: + rtl8169miimiw(ctlr->mii, 1, 0x1f, 0); /* power up phy */ + rtl8169miimiw(ctlr->mii, 1, 0x1e, 0); + } + dprint("oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n", + phy->oui, phy->phyno, ctlr->macv, ctlr->phyv); + + if(miistatus(mii) < 0){ + miireset(mii); + miiane(mii, ~0, ~0, ~0); + } + + return mii; +} + +#endif + +static void +rtl8169halt(Ctlr* ctlr) +{ + csr8w(ctlr, Cr, 0); + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); +} + +static int +rtl8169reset(Ctlr* ctlr) +{ + u32int r; + int timeo; + + /* + * Soft reset the controller. + */ + csr8w(ctlr, Cr, Rst); + for(r = timeo = 0; timeo < 1000; timeo++){ + r = csr8r(ctlr, Cr); + if(!(r & Rst)) + break; + delay(1); + } + rtl8169halt(ctlr); + + if(r & Rst) + return -1; + return 0; +} + +static void +rtl8169replenish(Ctlr* ctlr) +{ + D *d; + int rdt; + Msgbuf *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + d = &ctlr->rd[rdt]; + if(ctlr->rb[rdt] == nil){ + /* + * Simple allocation for now. + * This better be aligned on 8. + */ + bp = mballoc(Mps, 0, Mbeth1); + if(bp == nil){ + print("no available buffers\n"); + break; + } + ctlr->rb[rdt] = bp; + d->addrlo = Pciwaddrl(bp->data); + d->addrhi = Pciwaddrh(bp->data); + }else + print("i8169: rx overrun\n"); + coherence(); + d->control |= Own|Mtu; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->nrdfree++; + } + ctlr->rdt = rdt; +} + +static int +rtl8169init(Ether* edev) +{ + int i; + u32int r; + Msgbuf *bp; + Ctlr *ctlr; + u8int cplusc; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + rtl8169halt(ctlr); + + /* + * MAC Address is not settable on some (all?) chips. + * Must put chip into config register write enable mode. + */ + csr8w(ctlr, Cr9346, Eem1|Eem0); + + /* + * Transmitter. + */ + memset(ctlr->td, 0, sizeof(D)*ctlr->ntd); + ctlr->tdh = ctlr->tdt = 0; + ctlr->td[ctlr->ntd-1].control = Eor; + + /* + * Receiver. + * Need to do something here about the multicast filter. + */ + memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd); + ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0; + ctlr->rd[ctlr->nrd-1].control = Eor; + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + mbfree(bp); + } + rtl8169replenish(ctlr); + ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm; + + /* + * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst + * settings in Tcr/Rcr; the (1<<14) is magic. + */ + cplusc = csr16r(ctlr, Cplusc) & ~(1<<14); + cplusc |= Rxchksum | Mulrw; + switch(ctlr->macv){ + default: + panic("8169init: unknown macv: %.8ux", ctlr->macv); + case Macv01: + break; + case Macv02: + case Macv03: + cplusc |= 1<<14; /* magic */ + break; + case Macv05: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + r = csr8r(ctlr, Config2) & 0x07; + if(r == 0x01) /* 66MHz PCI */ + csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */ + else + csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */ + pciclrmwi(ctlr->pcidev); + break; + case Macv13: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + pcicfgw8(ctlr->pcidev, 0x68, 0x00); /* magic */ + pcicfgw8(ctlr->pcidev, 0x69, 0x08); /* magic */ + break; + case Macv04: + case Macv07: + case Macv07a: + case Macv11: + case Macv12: + case Macv14: + case Macv15: + case Macv19: + case Macv25: + case Macv26: + case Macv27: + case Macv28: + case Macv29: + break; + } + + /* + * Enable receiver/transmitter. + * Need to do this first or some of the settings below + * won't take. + */ + switch(ctlr->pciv){ + default: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + csr32w(ctlr, Mar0, 0); + csr32w(ctlr, Mar0+4, 0); + case Rtl8169sc: + case Rtl8168b: + break; + } + + /* + * Interrupts. + * Disable Tdu|Tok for now, the transmit routine will tidy. + * Tdu means the NIC ran out of descriptors to send, so it + * doesn't really need to ever be on. + */ + csr32w(ctlr, Timerint, 0); + ctlr->imr = Serr|Timeout|Fovw|Punlc|Rdu|Ter|Rer|Rok; + csr16w(ctlr, Imr, ctlr->imr); + + /* + * Clear missed-packet counter; + * clear early transmit threshold value; + * set the descriptor ring base addresses; + * set the maximum receive packet size; + * no early-receive interrupts. + * + * note: the maximum rx size is a filter. the size of the buffer + * in the descriptor ring is still honored. we will toss >Mtu + * packets because they've been fragmented into multiple + * rx buffers. + */ + csr32w(ctlr, Mpc, 0); + csr8w(ctlr, Etx, 0x3f); + csr32w(ctlr, Tnpds+4, Pciwaddrh(ctlr->td)); + csr32w(ctlr, Tnpds, Pciwaddrl(ctlr->td)); + csr32w(ctlr, Rdsar+4, Pciwaddrh(ctlr->rd)); + csr32w(ctlr, Rdsar, Pciwaddrl(ctlr->rd)); + csr16w(ctlr, Rms, Mtu); /* was Mps; see above comment */ + r = csr16r(ctlr, Mulint) & 0xF000; /* no early rx interrupts */ + csr16w(ctlr, Mulint, r); + csr16w(ctlr, Cplusc, cplusc); + csr16w(ctlr, Coal, 0); + + /* + * Set configuration. + */ + switch(ctlr->pciv){ + case Rtl8169sc: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + case Rtl8168b: + case Rtl8169c: + csr16w(ctlr, Cplusc, 0x2000); /* magic */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + } + ctlr->tcr = csr32r(ctlr, Tcr); + csr8w(ctlr, Cr9346, 0); + + iunlock(&ctlr->ilock); + +// rtl8169mii(ctlr); + + return 0; +} + +static void +rtl8169attach(Ether* edev) +{ +// int timeo, firsta; + Ctlr *ctlr; +// MiiPhy *phy; + + ctlr = edev->ctlr; +// firsta = 0; + qlock(&ctlr->alock); + if(ctlr->init == 0){ + /* + * Handle allocation/init errors here. + */ + ctlr->td = ialloc(sizeof(D)*Ntd, 256); + ctlr->tb = ialloc(Ntd*sizeof(Msgbuf*), 0); + ctlr->ntd = Ntd; + ctlr->rd = ialloc(sizeof(D)*Nrd, 256); + ctlr->rb = ialloc(Nrd*sizeof(Msgbuf*), 0); + ctlr->nrd = Nrd; + ctlr->dtcc = ialloc(sizeof(Dtcc), 64); + rtl8169init(edev); + ctlr->init = 1; +// firsta = 1; + } + qunlock(&ctlr->alock); + + /* + * Wait for link to be ready. why here? + */ +#ifdef notdef + if(firsta){ + for(timeo = 0; timeo < 350; timeo += 10){ + if(miistatus(ctlr->mii) == 0) + break; + tsleep(&up->sleep, return0, 0, 10); + } + phy = ctlr->mii->curphy; + dprint("%s: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, phy->rfc, phy->tfc); + } +#endif +} + +static void +rtl8169link(Ether* edev) +{ + USED(edev); + return; +#ifdef notdef + int limit; + Ctlr *ctlr; + MiiPhy *phy; + + ctlr = edev->ctlr; + + /* + * Maybe the link changed - do we care very much? + * Could stall transmits if no link, maybe? + */ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) + return; + + phy = ctlr->mii->curphy; + if(miistatus(ctlr->mii) < 0){ + dprint("%slink n: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + edev->link = 0; + return; + } + edev->link = 1; + + limit = 256*1024; + if(phy->speed == 10){ + edev->mbps = 10; + limit = 65*1024; + } + else if(phy->speed == 100) + edev->mbps = 100; + else if(phy->speed == 1000) + edev->mbps = 1000; + dprint("%slink y: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + + if(edev->oq != nil) + qsetlimit(edev->oq, limit); +#endif +} + +static void +rtl8169transmit(Ether* edev) +{ + D *d; + Msgbuf *bp; + Ctlr *ctlr; + int control, x; + + ctlr = edev->ctlr; + + ilock(&ctlr->tlock); + for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){ + d = &ctlr->td[x]; + if((control = d->control) & Own) + break; + + /* + * Check errors and log here. + */ + USED(control); + + /* + * Free it up. + * Need to clean the descriptor here? Not really. + * Simple freeb for now (no chain and freeblist). + * Use ntq count for now. + */ + mbfree(ctlr->tb[x]); + ctlr->tb[x] = nil; + d->control &= Eor; + + ctlr->ntq--; + } + ctlr->tdh = x; + + x = ctlr->tdt; + while(ctlr->ntq < (ctlr->ntd-1)){ + if((bp = etheroq(edev)) == nil) + break; + + d = &ctlr->td[x]; + d->addrlo = Pciwaddrl(bp->data); + d->addrhi = Pciwaddrh(bp->data); + ctlr->tb[x] = bp; + coherence(); + d->control |= Own|Fs|Ls|bp->count; + + x = NEXT(x, ctlr->ntd); + ctlr->ntq++; + } + if(x != ctlr->tdt){ + ctlr->tdt = x; + csr8w(ctlr, Tppoll, Npq); + } + else if(ctlr->ntq >= (ctlr->ntd-1)) + ctlr->txdu++; + + iunlock(&ctlr->tlock); +} + +static void +rtl8169receive(Ether* edev) +{ + D *d; + int rdh; + Msgbuf *bp; + Ctlr *ctlr; + u32int control; + + ctlr = edev->ctlr; + + rdh = ctlr->rdh; + for(;;){ + d = &ctlr->rd[rdh]; + + if(d->control & Own) + break; + + control = d->control; + if((control & (Fs|Ls|Res)) == (Fs|Ls)){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->count = (control & RxflMASK)-4; + bp->next = nil; + + if(control & Fovf) + ctlr->fovf++; + + switch(control & (Pid1|Pid0)){ + default: + break; + case Pid0: + if(control & Tcpf){ + ctlr->tcpf++; + break; + } + bp->flags |= Btcpck; + break; + case Pid1: + if(control & Udpf){ + ctlr->udpf++; + break; + } + bp->flags |= Budpck; + break; + case Pid1|Pid0: + if(control & Ipf){ + ctlr->ipf++; + break; + } + bp->flags |= Bipck; + break; + } + etheriq(edev, bp); + } + else{ + // if(!(control & Res)) + // ctlr->frag++; + /* iprint("i8169: control %#.8ux\n", control); */ + mbfree(ctlr->rb[rdh]); + } + d->control &= Eor; + ctlr->nrdfree--; + rdh = NEXT(rdh, ctlr->nrd); + + if(ctlr->nrdfree < ctlr->nrd/2) + rtl8169replenish(ctlr); + } + ctlr->rdh = rdh; +} + +static void +rtl8169interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + u32int isr; + + edev = arg; + ctlr = edev->ctlr; + + while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){ + csr16w(ctlr, Isr, isr); + if((isr & ctlr->imr) == 0) + break; + if(isr & (Fovw|Punlc|Rdu|Rer|Rok)){ + rtl8169receive(edev); + if(!(isr & (Punlc|Rok))) + ctlr->ierrs++; + if(isr & Rer) + ctlr->rer++; + if(isr & Rdu) + ctlr->rdu++; + if(isr & Punlc) + ctlr->punlc++; + if(isr & Fovw) + ctlr->fovw++; + isr &= ~(Fovw|Rdu|Rer|Rok); + } + + if(isr & (Tdu|Ter|Tok)){ + rtl8169transmit(edev); + isr &= ~(Tdu|Ter|Tok); + } + + if(isr & Punlc){ + rtl8169link(edev); + isr &= ~Punlc; + } + + /* + * Some of the reserved bits get set sometimes... + */ + if(isr & (Serr|Timeout|Tdu|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok)) + panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux", + csr16r(ctlr, Imr), isr); + } +} + +int +vetmacv(Ctlr *ctlr, uint *macv) +{ + *macv = csr32r(ctlr, Tcr) & HwveridMASK; + switch(*macv){ + default: + return -1; + case Macv01: + case Macv02: + case Macv03: + case Macv04: + case Macv05: + case Macv07: + case Macv07a: + case Macv11: + case Macv12: + case Macv13: + case Macv14: + case Macv15: + case Macv19: + case Macv25: + case Macv26: + case Macv27: + case Macv28: + case Macv29: + break; + } + return 0; +} + +static void +rtl8169pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, port, pcie; + uint macv; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + pcie = 0; + switch(i = ((p->did<<16)|p->vid)){ + default: + continue; + case Rtl8100e: /* RTL810[01]E ? */ + case Rtl8168b: /* RTL8168B */ + pcie = 1; + break; + case Rtl8169c: /* RTL8169C */ + case Rtl8169sc: /* RTL8169SC */ + case Rtl8169: /* RTL8169 */ + break; + case (0xC107<<16)|0x1259: /* Corega CG-LAPCIGT */ + i = Rtl8169; + break; + } + + port = p->mem[0].bar & ~0x01; +// if(ioalloc(port, p->mem[0].size, 0, "rtl8169") < 0){ +// print("rtl8169: port %#ux in use\n", port); +// continue; +// } + + ctlr = ialloc(sizeof(Ctlr), 0); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->pciv = i; + ctlr->pcie = pcie; + + if(vetmacv(ctlr, &macv) == -1){ +// iofree(port); +// free(ctlr); + print("rtl8169: unknown mac %.4ux %.8ux\n", p->did, macv); + continue; + } + +#ifdef notdef + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } +#endif + + if(rtl8169reset(ctlr)){ +// iofree(port); +// free(ctlr); + continue; + } + + /* + * Extract the chip hardware version, + * needed to configure each properly. + */ + ctlr->macv = macv; + +// rtl8169mii(ctlr); + + pcisetbme(p); + + if(rtl8169ctlrhead != nil) + rtl8169ctlrtail->next = ctlr; + else + rtl8169ctlrhead = ctlr; + rtl8169ctlrtail = ctlr; + } +} + +int +rtl8169pnp(Ether* edev) +{ + u32int r; + Ctlr *ctlr; + uchar ea[Easize]; + static int once; + + if(once == 0){ + once = 1; + rtl8169pci(); + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 100; + switch(ctlr->macv){ + case Macv28: + edev->ifc.maxmtu = 1514; + break; + default: + edev->ifc.maxmtu = Mtu; + } + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the device and set in edev->ea. + */ + memset(ea, 0, Easize); + if(memcmp(ea, edev->ea, Easize) == 0){ + r = csr32r(ctlr, Idr0); + edev->ea[0] = r; + edev->ea[1] = r>>8; + edev->ea[2] = r>>16; + edev->ea[3] = r>>24; + r = csr32r(ctlr, Idr0+4); + edev->ea[4] = r; + edev->ea[5] = r>>8; + } + + edev->attach = rtl8169attach; + edev->transmit = rtl8169transmit; + edev->interrupt = rtl8169interrupt; + rtl8169link(edev); + + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,1737 @@ +/* + * Intel 8256[367], 8257[1-9], 8258[03], i21[01], i350 + * Gigabit Ethernet PCI-Express Controllers + * Coraid EtherDrive® hba + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" + +/* this is pretty gross */ +#define Block Msgbuf +#define freeb(b) mbfree(b) +#define BLEN(b) ((b)->count) +#define rp data +#define iprint(...) print(__VA_ARGS__) + +/* + * note: the 82575, 82576 and 82580 are operated using registers aliased + * to the 82563-style architecture. many features seen in the 82598 + * are also seen in the 82575 part. + */ + +enum { + /* General */ + + Ctrl = 0x0000, /* Device Control */ + Status = 0x0008, /* Device Status */ + Eec = 0x0010, /* EEPROM/Flash Control/Data */ + Eerd = 0x0014, /* EEPROM Read */ + Ctrlext = 0x0018, /* Extended Device Control */ + Fla = 0x001c, /* Flash Access */ + Mdic = 0x0020, /* MDI Control */ + Fcal = 0x0028, /* Flow Control Address Low */ + Fcah = 0x002C, /* Flow Control Address High */ + Fct = 0x0030, /* Flow Control Type */ + Kumctrlsta = 0x0034, /* Kumeran Control and Status Register */ + Connsw = 0x0034, /* copper / fiber switch control; 82575/82576 */ + Vet = 0x0038, /* VLAN EtherType */ + Fcttv = 0x0170, /* Flow Control Transmit Timer Value */ + Txcw = 0x0178, /* Transmit Configuration Word */ + Rxcw = 0x0180, /* Receive Configuration Word */ + Ledctl = 0x0E00, /* LED control */ + Pba = 0x1000, /* Packet Buffer Allocation */ + Pbs = 0x1008, /* Packet Buffer Size */ + + /* Interrupt */ + + Icr = 0x00C0, /* Interrupt Cause Read */ + Itr = 0x00c4, /* Interrupt Throttling Rate */ + Ics = 0x00C8, /* Interrupt Cause Set */ + Ims = 0x00D0, /* Interrupt Mask Set/Read */ + Imc = 0x00D8, /* Interrupt mask Clear */ + Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */ + Eitr = 0x1680, /* Extended itr; 82575/6 80 only */ + + /* Receive */ + + Rctl = 0x0100, /* Control */ + Ert = 0x2008, /* Early Receive Threshold (573[EVL], 82578 only) */ + Fcrtl = 0x2160, /* Flow Control RX Threshold Low */ + Fcrth = 0x2168, /* Flow Control Rx Threshold High */ + Psrctl = 0x2170, /* Packet Split Receive Control */ + Drxmxod = 0x2540, /* dma max outstanding bytes (82575) */ + Rdbal = 0x2800, /* Rdesc Base Address Low Queue 0 */ + Rdbah = 0x2804, /* Rdesc Base Address High Queue 0 */ + Rdlen = 0x2808, /* Descriptor Length Queue 0 */ + Srrctl = 0x280c, /* split and replication rx control (82575) */ + Rdh = 0x2810, /* Descriptor Head Queue 0 */ + Rdt = 0x2818, /* Descriptor Tail Queue 0 */ + Rdtr = 0x2820, /* Descriptor Timer Ring */ + Rxdctl = 0x2828, /* Descriptor Control */ + Radv = 0x282C, /* Interrupt Absolute Delay Timer */ + Rsrpd = 0x2c00, /* Small Packet Detect */ + Raid = 0x2c08, /* ACK interrupt delay */ + Cpuvec = 0x2c10, /* CPU Vector */ + Rxcsum = 0x5000, /* Checksum Control */ + Rmpl = 0x5004, /* rx maximum packet length (82575) */ + Rfctl = 0x5008, /* Filter Control */ + Mta = 0x5200, /* Multicast Table Array */ + Ral = 0x5400, /* Receive Address Low */ + Rah = 0x5404, /* Receive Address High */ + Vfta = 0x5600, /* VLAN Filter Table Array */ + Mrqc = 0x5818, /* Multiple Receive Queues Command */ + + /* Transmit */ + + Tctl = 0x0400, /* Transmit Control */ + Tipg = 0x0410, /* Transmit IPG */ + Tkabgtxd = 0x3004, /* glci afe band gap transmit ref data, or something */ + Tdbal = 0x3800, /* Tdesc Base Address Low */ + Tdbah = 0x3804, /* Tdesc Base Address High */ + Tdlen = 0x3808, /* Descriptor Length */ + Tdh = 0x3810, /* Descriptor Head */ + Tdt = 0x3818, /* Descriptor Tail */ + Tidv = 0x3820, /* Interrupt Delay Value */ + Txdctl = 0x3828, /* Descriptor Control */ + Tadv = 0x382C, /* Interrupt Absolute Delay Timer */ + Tarc0 = 0x3840, /* Arbitration Counter Queue 0 */ + + /* Statistics */ + + Statistics = 0x4000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 0x124/4, +}; + +enum { /* Ctrl */ + Lrst = 1<<3, /* link reset */ + Slu = 1<<6, /* Set Link Up */ + Devrst = 1<<26, /* Device Reset */ + Rfce = 1<<27, /* Receive Flow Control Enable */ + Tfce = 1<<28, /* Transmit Flow Control Enable */ + Phyrst = 1<<31, /* Phy Reset */ +}; + +enum { /* Status */ + Lu = 1<<1, /* Link Up */ + Lanid = 3<<2, /* mask for Lan ID. */ + Txoff = 1<<4, /* Transmission Paused */ + Tbimode = 1<<5, /* TBI Mode Indication */ + Phyra = 1<<10, /* PHY Reset Asserted */ + GIOme = 1<<19, /* GIO Master Enable Status */ +}; + +enum { + /* Eec */ + Nvpres = 1<<8, /* nvram present */ + Autord = 1<<9, /* autoread complete */ + Sec1val = 1<<22, /* sector 1 valid (!sec0) */ +}; + +enum { /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ +}; + +enum { /* Ctrlext */ + Eerst = 1<<13, /* EEPROM Reset */ + Linkmode = 3<<22, /* linkmode */ + Internalphy = 0<<22, /* " internal phy (copper) */ + Sgmii = 2<<22, /* " sgmii */ + Serdes = 3<<22, /* " serdes */ +}; + +enum { + /* Connsw */ + Enrgirq = 1<<2, /* interrupt on power detect (enrgsrc) */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* phy interface */ + Phyctl = 0, /* phy ctl register */ + Phyisr = 19, /* 82563 phy interrupt status register */ + Phylhr = 19, /* 8257[12] link health register */ + Physsr = 17, /* phy secondary status register */ + Phyprst = 193<<8 | 17, /* 8256[34] phy port reset */ + Phyier = 18, /* 82573 phy interrupt enable register */ + Phypage = 22, /* 8256[34] page register */ + Phystat = 26, /* 82580 phy status */ + Phyapage = 29, + Phy79page = 31, /* 82579 phy page register (all pages) */ + + Rtlink = 1<<10, /* realtime link status */ + Phyan = 1<<11, /* phy has autonegotiated */ + + /* Phyctl bits */ + Ran = 1<<9, /* restart auto negotiation */ + Ean = 1<<12, /* enable auto negotiation */ + + /* Phyprst bits */ + Prst = 1<<0, /* reset the port */ + + /* 82573 Phyier bits */ + Lscie = 1<<10, /* link status changed ie */ + Ancie = 1<<11, /* auto negotiation complete ie */ + Spdie = 1<<14, /* speed changed ie */ + Panie = 1<<15, /* phy auto negotiation error ie */ + + /* Phylhr/Phyisr bits */ + Anf = 1<<6, /* lhr: auto negotiation fault */ + Ane = 1<<15, /* isr: auto negotiation error */ + + /* 82580 Phystat bits */ + Ans = 1<<14 | 1<<15, /* 82580 autoneg. status */ + Link = 1<<6, /* 82580 Link */ + + /* Rxcw builtin serdes */ + Anc = 1<<31, + Rxsynch = 1<<30, + Rxcfg = 1<<29, + Rxcfgch = 1<<28, + Rxcfgbad = 1<<27, + Rxnc = 1<<26, + + /* Txcw */ + Txane = 1<<31, + Txcfg = 1<<30, +}; + +enum { /* fiber (pcs) interface */ + Pcsctl = 0x4208, /* pcs control */ + Pcsstat = 0x420c, /* pcs status */ + + /* Pcsctl bits */ + Pan = 1<<16, /* autonegotiate */ + Prestart = 1<<17, /* restart an (self clearing) */ + + /* Pcsstat bits */ + Linkok = 1<<0, /* link is okay */ + Andone = 1<<16, /* an phase is done see below for success */ + Anbad = 1<<19 | 1<<20, /* Anerror | Anremfault */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt; !82575/6/80 only */ + Rxdw = 0x00000080, /* Rdesc write back; 82575/6/80 only */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfgset = 0x00000400, /* Receiving /C/ ordered sets */ + Ack = 0x00020000, /* Receive ACK frame */ + Omed = 1<<20, /* media change; pcs interface */ +}; + +enum { /* Txcw */ + TxcwFd = 0x00000020, /* Full Duplex */ + TxcwHd = 0x00000040, /* Half Duplex */ + TxcwPauseMASK = 0x00000180, /* Pause */ + TxcwPauseSHIFT = 7, + TxcwPs = 1<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr *i82563ctlr; +static Rbpool rbtab[Npool]; + +static char *statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-mtu Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + "Management Packets Rx", + "Management Packets Drop", + "Management Packets Tx", + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-mtu Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", + "Interrupt Assertion", + "Interrupt Rx Pkt Timer", + "Interrupt Rx Abs Timer", + "Interrupt Tx Pkt Timer", + "Interrupt Tx Abs Timer", + "Interrupt Tx Queue Empty", + "Interrupt Tx Desc Low", + "Interrupt Rx Min", + "Interrupt Rx Overrun", +}; + +static char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Block* +i82563rballoc(Rbpool *p) +{ + Block *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + b->flags &= ~FREE; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Block *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->flags |= FREE; + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(0) + iprint("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Block *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Block *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Block *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Block *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Block *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Block *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Block *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Block *b) +{ + rbfree(b, 7); +} + +static void +rbfree8(Block *b) +{ + rbfree(b, 8); +} + +static void +rbfree9(Block *b) +{ + rbfree(b, 9); +} + +static Freefn freetab[Npool] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, + rbfree8, + rbfree9, +}; + +static int +newpool(void) +{ + static int seq; + + if(seq == nelem(freetab)) + return -1; + if(freetab[seq] == nil){ + print("82563: bad freetab\n"); + return -1; + } + return seq++; +} + +static void +i82563im(Ctlr *ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static void +i82563txinit(Ctlr *ctlr) +{ + int i; + u32int r; + Block *b; + + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Tctl, 0x0F<tdba)); + csr32w(ctlr, Tdbah, Pciwaddrh(ctlr->tdba)); + csr32w(ctlr, Tdlen, ctlr->ntd * sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + for(i = 0; i < ctlr->ntd; i++){ + if((b = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(b); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + csr32w(ctlr, Tidv, 128); + csr32w(ctlr, Tadv, 64); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) | Ten); + r = csr32r(ctlr, Txdctl) & ~WthreshMASK; + r |= 4<type].flag & F75) + r |= Enable; + csr32w(ctlr, Txdctl, r); +} + +#define Next(x, m) (((x)+1) & (m)) + +static int +i82563cleanup(Ether *e) +{ + Block *b; + Ctlr *c; + int tdh, m, n; + + c = e->ctlr; + tdh = c->tdh; + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status & Tdd){ + tdh = n; + if((b = c->tb[tdh]) != nil){ + c->tb[tdh] = nil; + freeb(b); + }else + iprint("#l%d: %s tx underrun! %d\n", e->ctlrno, cname(c), n); + c->tdba[tdh].status = 0; + } + + return c->tdh = tdh; +} + +static int +notrim(void *v) +{ + Ctlr *c; + + c = v; + return (c->im & Txdw) == 0; +} + +static void +i82563tproc(void) +{ + Td *td; + Block *bp; + Ether *edev; + Ctlr *ctlr; + int tdh, tdt, m; + + edev = u->arg; + ctlr = edev->ctlr; + tdt = ctlr->tdt; + m = ctlr->ntd-1; + + for(;;){ + tdh = i82563cleanup(edev); + + if(Next(tdt, m) == tdh){ + ctlr->txdw++; + i82563im(ctlr, Txdw); + sleep(&ctlr->trendez, notrim, ctlr); + continue; + } +Msgbuf* etheroq1(Ether*, int); + bp = etheroq1(edev, 0); + td = &ctlr->tdba[tdt]; + td->addr[0] = Pciwaddrl(bp->rp); + td->addr[1] = Pciwaddrh(bp->rp); + td->control = Ide|Rs|Ifcs|Teop|BLEN(bp); + ctlr->tb[tdt] = bp; + tdt = Next(tdt, m); +extern void sfence(void); + sfence(); + csr32w(ctlr, Tdt, tdt); + } +} + +static int +i82563replenish(Ctlr *ctlr, int maysleep) +{ + uint rdt, m, i; + Block *bp; + Rbpool *p; + Rd *rd; + + rdt = ctlr->rdt; + m = ctlr->nrd-1; + p = rbtab + ctlr->pool; + i = 0; + for(; Next(rdt, m) != ctlr->rdh; rdt = Next(rdt, m)){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] != nil){ + iprint("%s: tx overrun\n", cname(ctlr)); + break; + } + redux: + bp = i82563rballoc(p); + if(bp == nil){ + if(rdt - ctlr->rdh >= 16) + break; + print("%s: pool %d: no rx buffers\n", cname(ctlr), ctlr->pool); + if(maysleep == 0) + return -1; + ilock(p); + p->starve = 1; + iunlock(p); + sleep(p, icansleep, p); + goto redux; + } + i++; + ctlr->rb[rdt] = bp; + rd->addr[0] = Pciwaddrl(bp->rp); + rd->addr[1] = Pciwaddrh(bp->rp); + rd->status = 0; + ctlr->rdfree++; + } + if(i != 0){ + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); + } + return 0; +} + +static void +i82563rxinit(Ctlr *ctlr) +{ + int i; + Block *bp; + + if(ctlr->rbsz <= 2048) + csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF); + else{ + i = ctlr->rbsz / 1024; + if(ctlr->rbsz % 1024) + i++; + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rctl, Lpe|Dpf|Bsize2048|Bam|RdtmsHALF|Secrc); + if(ctlr->type != i82575) + i |= (ctlr->nrd/2>>4)<<20; /* RdmsHalf */ + csr32w(ctlr, Srrctl, i | Dropen); + csr32w(ctlr, Rmpl, ctlr->rbsz); +// csr32w(ctlr, Drxmxod, 0x7ff); + }else + csr32w(ctlr, Rctl, Lpe|Dpf|BsizeFlex*i|Bam|RdtmsHALF|Secrc); + } + + if(cttab[ctlr->type].flag & Fert) + csr32w(ctlr, Ert, 1024/8); + + if(ctlr->type == i82566) + csr32w(ctlr, Pbs, 16); + + csr32w(ctlr, Rdbal, Pciwaddrl(ctlr->rdba)); + csr32w(ctlr, Rdbah, Pciwaddrh(ctlr->rdba)); + csr32w(ctlr, Rdlen, ctlr->nrd * sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 0; //25; + ctlr->radv = 0; //500; + csr32w(ctlr, Rdtr, ctlr->rdtr); + csr32w(ctlr, Radv, ctlr->radv); + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Rxdctl, 1<rim != 0; +} + +static void +i82563rproc(void) +{ + uint m, rdh, rim, im; + Block *bp; + Ctlr *ctlr; + Ether *edev; + Rd *rd; + + edev = u->arg; + ctlr = edev->ctlr; + + i82563rxinit(ctlr); + csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren); + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable); + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + }else + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + m = ctlr->nrd-1; + + for(;;){ + i82563im(ctlr, im); + ctlr->rsleep++; + i82563replenish(ctlr, 1); + sleep(&ctlr->rrendez, i82563rim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + rim = ctlr->rim; + ctlr->rim = 0; + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + bp = ctlr->rb[rdh]; + if((rd->status & Reop) && rd->errors == 0){ + bp->count = rd->length; + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flags |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flags |= Btcpck|Budpck; + } + bp->flags |= Bpktck; + } + etheriq(edev, bp); + } else + freeb(bp); + ctlr->rb[rdh] = nil; + rd->status = 0; + ctlr->rdfree--; + ctlr->rdh = rdh = Next(rdh, m); + if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0)) + if(i82563replenish(ctlr, 0) == -1) + break; + } + } +} + +static int +i82563lim(void *v) +{ + return ((Ctlr*)v)->lim != 0; +} + +static int speedtab[] = { + 10, 100, 1000, 0 +}; + +static uint phywrite0(Ctlr*, int, int, ushort); + +static uint +setpage(Ctlr *c, uint phyno, uint p, uint r) +{ + uint pr; + + switch(c->type){ + case i82563: + if(r >= 16 && r <= 28 && r != 22) + pr = Phypage; + else if(r == 30 || r == 31) + pr = Phyapage; + else + return 0; + return phywrite0(c, phyno, pr, p); + case i82579: + return phywrite0(c, phyno, Phy79page, p<<5); + default: + if(p == 0) + return 0; + return ~0; + } +} + +static uint +phyread0(Ctlr *c, int phyno, int reg) +{ + uint phy, i; + + csr32w(c, Mdic, MDIrop | phyno<type].name, phyno, phy); + return ~0; + } + return phy & 0xffff; +} + +static uint +phyread(Ctlr *c, uint phyno, uint reg) +{ + if(setpage(c, phyno, reg>>8, reg & 0xff) == ~0){ + print("%s: phyread: bad phy page %d\n", cname(c), reg>>8); + return ~0; + } + return phyread0(c, phyno, reg & 0xff); +} + +static uint +phywrite0(Ctlr *c, int phyno, int reg, ushort val) +{ + uint phy, i; + + csr32w(c, Mdic, MDIwop | phyno<>8, reg & 0xff) == ~0) + panic("%s: bad phy reg %.4ux", cname(c), reg); + return phywrite0(c, phyno, reg & 0xff, v); +} + +static void +phyerrata(Ether *e, Ctlr *c, uint phyno) +{ + if(e->mbps == 0) + if(c->phyerrata == 0){ + c->phyerrata++; + phywrite(c, phyno, Phyprst, Prst); /* try a port reset */ + print("%s: phy port reset\n", cname(c)); + } + else + c->phyerrata = 0; +} + +static void +i82563attach(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->alloc != nil){ + qunlock(&ctlr->alock); + return; + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + ctlr->alloc = ialloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 255, 0); + if(ctlr->alloc == nil){ + qunlock(&ctlr->alock); + panic("i82563: no memory"); + } + ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 256); + ctlr->tdba = (Td*)(ctlr->rdba + ctlr->nrd); + + ctlr->rb = ialloc(ctlr->nrd * sizeof(Block*), 0); + ctlr->tb = ialloc(ctlr->ntd * sizeof(Block*), 0); + + mballocpool(Nrb, ctlr->rbsz, Rbalign, Mbeth82563, freetab[ctlr->pool]); + + snprint(ctlr->rname, sizeof ctlr->rname, "#l%dr", edev->ctlrno); + userinit(i82563rproc, edev, ctlr->rname); + + i82563txinit(ctlr); + snprint(ctlr->tname, sizeof ctlr->tname, "#l%dt", edev->ctlrno); + userinit(i82563tproc, edev, ctlr->tname); + + qunlock(&ctlr->alock); +} + +static void +i82563interrupt(Ureg*, void *arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + + while(icr = csr32r(ctlr, Icr) & ctlr->im){ + if(icr & (Lsc | Omed)){ + im &= ~(Lsc | Omed); + ctlr->lim = icr & (Lsc | Omed); + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack)){ + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + ctlr->tintr++; + wakeup(&ctlr->trendez); + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); +} + +static int +i82563detach(Ctlr *ctlr) +{ + int r, timeo; + + /* balance rx/tx packet buffer; survives reset */ + if(ctlr->rbsz > 8192 && cttab[ctlr->type].flag & Fpba){ + ctlr->pba = csr32r(ctlr, Pba); + r = ctlr->pba >> 16; + r += ctlr->pba & 0xffff; + r >>= 1; + csr32w(ctlr, Pba, r); + }else if(ctlr->type == i82573 && ctlr->rbsz > 1514) + csr32w(ctlr, Pba, 14); + ctlr->pba = csr32r(ctlr, Pba); + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) & ~Ten); + + delay(10); + + r = csr32r(ctlr, Ctrl); + if(ctlr->type == i82566 || ctlr->type == i82579) + r |= Phyrst; + csr32w(ctlr, Ctrl, Devrst | r); + delay(1); + for(timeo = 0;; timeo++){ + if((csr32r(ctlr, Ctrl) & (Devrst|Phyrst)) == 0) + break; + if(timeo >= 1000) + return -1; + delay(1); + } + + r = csr32r(ctlr, Ctrl); + csr32w(ctlr, Ctrl, Slu|r); + + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if((csr32r(ctlr, Icr) & ~Rxcfg) == 0) + break; + delay(1); + } + if(csr32r(ctlr, Icr) & ~Rxcfg) + return -1; + + return 0; +} + +static void +i82563shutdown(Ether *edev) +{ + i82563detach(edev->ctlr); +} + +static ushort +eeread(Ctlr *ctlr, int adr) +{ + csr32w(ctlr, Eerd, EEstart | adr << 2); + while ((csr32r(ctlr, Eerd) & EEdone) == 0) + ; + return csr32r(ctlr, Eerd) >> 16; +} + +static int +eeload(Ctlr *ctlr) +{ + u16int sum; + int data, adr; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = eeread(ctlr, adr); + ctlr->eeprom[adr] = data; + sum += data; + } + return sum; +} + +static int +fcycle(Ctlr*, Flash *f) +{ + u16int s, i; + + s = f->reg[Fsts]; + if((s&Fvalid) == 0) + return -1; + f->reg[Fsts] |= Fcerr | Ael; + for(i = 0; i < 10; i++){ + if((s&Scip) == 0) + return 0; + delay(1); + s = f->reg[Fsts]; + } + return -1; +} + +static int +fread(Ctlr *c, Flash *f, int ladr) +{ + u16int s; + + delay(1); + if(fcycle(c, f) == -1) + return -1; + f->reg[Fsts] |= Fdone; + f->reg32[Faddr] = ladr; + + /* setup flash control register */ + s = f->reg[Fctl] & ~0x3ff; + f->reg[Fctl] = s | 1<<8 | Fgo; /* 2 byte read */ + + while((f->reg[Fsts] & Fdone) == 0) + ; + if(f->reg[Fsts] & (Fcerr|Ael)) + return -1; + return f->reg32[Fdata] & 0xffff; +} + +static int +fload(Ctlr *c) +{ + uint data, io, r, adr; + u16int sum; + Flash f; + + io = c->pcidev->mem[1].bar & ~0x0f; + f.reg = vmap(io, c->pcidev->mem[1].size); + if(f.reg == nil) + return -1; + f.reg32 = (u32int*)f.reg; + f.base = f.reg32[Bfpr] & 0x1fff; + f.lim = f.reg32[Bfpr]>>16 & 0x1fff; + if(csr32r(c, Eec) & Sec1val) + f.base += f.lim+1 - f.base >> 1; + r = f.base << 12; + sum = 0; + for(adr = 0; adr < 0x40; adr++) { + data = fread(c, &f, r + adr*2); + if(data == -1) + return -1; + c->eeprom[adr] = data; + sum += data; + } + vunmap(f.reg, c->pcidev->mem[1].size); + return sum; +} + +static void +defaultea(Ctlr *ctlr, uchar *ra) +{ + uint i, r; + uvlong u; + static uchar nilea[Eaddrlen]; + + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + if(cttab[ctlr->type].flag & Fflashea){ + /* intel mb bug */ + u = (uvlong)csr32r(ctlr, Rah)<<32u | (uint)csr32r(ctlr, Ral); + for(i = 0; i < Eaddrlen; i++) + ra[i] = u >> 8*i; + } + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + for(i = 0; i < Eaddrlen/2; i++){ + ra[2*i] = ctlr->eeprom[Ea+i]; + ra[2*i+1] = ctlr->eeprom[Ea+i] >> 8; + } + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ra[5] += r; /* ea ctlr[n] = ea ctlr[0]+n */ +} + +static int +reset(Ctlr *ctlr) +{ + uchar *ra; + int i, r; + + if(i82563detach(ctlr)) + return -1; + if(cttab[ctlr->type].flag & Fload) + r = fload(ctlr); + else + r = eeload(ctlr); + if(r != 0 && r != 0xbaba){ + print("%s: bad eeprom checksum - %#.4ux\n", + cname(ctlr), r); + return -1; + } + + ra = ctlr->ra; + defaultea(ctlr, ra); + csr32w(ctlr, Ral, ra[3]<<24 | ra[2]<<16 | ra[1]<<8 | ra[0]); + csr32w(ctlr, Rah, 1<<31 | ra[5]<<8 | ra[4]); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta + i*4, 0); + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x0100); + if(ctlr->type != i82579 && ctlr->type != i210 && ctlr->type != i350) + csr32w(ctlr, Fct, 0x8808); + csr32w(ctlr, Fcttv, 0x0100); + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Eitr, 128<<2); /* 128 ¼ microsecond intervals */ + return 0; +} + +static int +didtype(int d) +{ + switch(d){ + case 0x1096: + case 0x10ba: /* “gilgal” */ + case 0x1098: /* serdes; not seen */ + case 0x10bb: /* serdes */ + return i82563; + case 0x1049: /* mm */ + case 0x104a: /* dm */ + case 0x104b: /* dc */ + case 0x104d: /* v “ninevah” */ + case 0x10bd: /* dm-2 */ + case 0x294c: /* ich 9 */ + return i82566; + case 0x10de: /* lm ich10d */ + case 0x10df: /* lf ich10 */ + case 0x10e5: /* lm ich9 */ + case 0x10f5: /* lm ich9m; “boazman” */ + return i82567; + case 0x10bf: /* lf ich9m */ + case 0x10cb: /* v ich9m */ + case 0x10cd: /* lf ich10 */ + case 0x10ce: /* v ich10 */ + case 0x10cc: /* lm ich10 */ + return i82567m; + case 0x105e: /* eb */ + case 0x105f: /* eb */ + case 0x1060: /* eb */ + case 0x10a4: /* eb */ + case 0x10a5: /* eb fiber */ + case 0x10bc: /* eb */ + case 0x10d9: /* eb serdes */ + case 0x10da: /* eb serdes “ophir” */ + return i82571; + case 0x107d: /* eb copper */ + case 0x107e: /* ei fiber */ + case 0x107f: /* ei */ + case 0x10b9: /* ei “rimon” */ + return i82572; + case 0x108b: /* e “vidalia” */ + case 0x108c: /* e (iamt) */ + case 0x109a: /* l “tekoa” */ + return i82573; + case 0x10d3: /* l or it; “hartwell” */ + return i82574; + case 0x10a7: + case 0x10a9: /* fiber/serdes */ + return i82575; + case 0x10c9: /* copper */ + case 0x10e6: /* fiber */ + case 0x10e7: /* serdes; “kawela” */ + case 0x150d: /* backplane */ + return i82576; + case 0x10ea: /* lc “calpella”; aka pch lan */ + return i82577; + case 0x10eb: /* lm “calpella” */ + return i82577m; + case 0x10ef: /* dc “piketon” */ + return i82578; + case 0x1502: /* lm */ + case 0x1503: /* v “lewisville” */ + return i82579; + case 0x10f0: /* dm “king's creek” */ + return i82578m; + case 0x150e: /* “barton hills” */ + case 0x150f: /* fiber */ + case 0x1510: /* backplane */ + case 0x1511: /* sfp */ + case 0x1516: + return i82580; + case 0x1506: /* v */ + return i82583; + case 0x1533: /* i210-t1 */ + case 0x1534: + case 0x1536: /* fiber */ + case 0x1538: + case 0x1539: /* i211 */ + case 0x153a: /* i217-lm */ + case 0x153b: /* i217-v */ + return i210; + case 0x151f: /* “powerville” eeprom-less */ + case 0x1521: /* copper */ + case 0x1522: /* fiber */ + case 0x1523: /* serdes */ + case 0x1524: /* sgmii */ + return i350; + } + return -1; +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +i82563pci(void) +{ + int type; + Ctlr *c, **cc; + Pcidev *p; + + cc = &i82563ctlr; + for(p = nil; p = pcimatch(p, 0x8086, 0);){ + hbafixup(p); + if((type = didtype(p->did)) == -1) + continue; + c = ialloc(sizeof *c, 0); + c->type = type; + c->pcidev = p; + c->rbsz = cttab[type].mtu; + c->port = p->mem[0].bar & ~0x0F; + *cc = c; + cc = &c->next; + } +} + +static int +setup(Ctlr *ctlr) +{ + Pcidev *p; + + if((ctlr->pool = newpool()) == -1){ + print("%s: no pool\n", cname(ctlr)); + return -1; + } + p = ctlr->pcidev; + ctlr->nic = vmap(ctlr->port, p->mem[0].size); + if(ctlr->nic == nil){ + print("%s: can't map %#P\n", cname(ctlr), ctlr->port); + return -1; + } + if(reset(ctlr)){ + vunmap(ctlr->nic, p->mem[0].size); + return -1; + } + pcisetbme(ctlr->pcidev); + return 0; +} + +static void +i82563transmit(Ether*) +{ +} + +int +i82563reset(Ether *edev) +{ + int type; + Ctlr *ctlr; + static int done; + + type = -1; + if(!done) { + i82563pci(); + done = 1; + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = i82563ctlr; ; ctlr = ctlr->next){ + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(type != -1 && ctlr->type != type) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + memmove(ctlr->ra, edev->ea, Eaddrlen); + if(setup(ctlr) == 0) + break; + } + } + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->ifc.maxmtu = ctlr->rbsz; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = i82563attach; + edev->interrupt = i82563interrupt; + edev->transmit = i82563transmit; + +// edev->arg = edev; + + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,985 @@ +/* + * intel 10gbe pcie driver + * copyright © 2007—2012, coraid, inc. + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" + +enum{ + /* general */ + Ctrl = 0x00000/4, /* Device Control */ + Status = 0x00008/4, /* Device Status */ + Ctrlext = 0x00018/4, /* Extended Device Control */ + Esdp = 0x00020/4, /* extended sdp control */ + Esodp = 0x00028/4, /* extended od sdp control */ + Ledctl = 0x00200/4, /* led control */ + Tcptimer = 0x0004c/4, /* tcp timer */ + Ecc = 0x110b0/4, /* errata ecc control magic */ + + /* nvm */ + Eec = 0x10010/4, /* eeprom/flash control */ + Eerd = 0x10014/4, /* eeprom read */ + Fla = 0x1001c/4, /* flash access */ + Flop = 0x1013c/4, /* flash opcode */ + Grc = 0x10200/4, /* general rx control */ + + /* interrupt */ + Icr = 0x00800/4, /* interrupt cause read */ + Ics = 0x00808/4, /* " set */ + Ims = 0x00880/4, /* " mask read/set */ + Imc = 0x00888/4, /* " mask clear */ + Iac = 0x00810/4, /* " auto clear */ + Iam = 0x00890/4, /* " auto mask enable */ + Itr = 0x00820/4, /* " throttling rate (0-19) */ + Ivar = 0x00900/4, /* " vector allocation regs. */ + /*msi interrupt */ + Msixt = 0x0000/4, /* msix table (bar3) */ + Msipba = 0x2000/4, /* msix pending bit array (bar3) */ + Pbacl = 0x11068/4, /* pba clear */ + Gpie = 0x00898/4, /* general purpose int enable */ + + /* flow control */ + Pfctop = 0x03008/4, /* priority flow ctl type opcode */ + Fcttv = 0x03200/4, /* " transmit timer value (0-3) */ + Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */ + Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */ + Rcrtv = 0x032a0/4, /* " refresh value threshold */ + Tfcs = 0x0ce00/4, /* " tx status */ + + /* rx dma */ + Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */ + Rbah = 0x01004/4, /* " high */ + Rdlen = 0x01008/4, /* " length */ + Rdh = 0x01010/4, /* " head */ + Rdt = 0x01018/4, /* " tail */ + Rxdctl = 0x01028/4, /* " control */ + + Srrctl = 0x02100/4, /* split and replication rx ctl. */ + Dcarxctl = 0x02200/4, /* rx dca control */ + Rdrxctl = 0x02f00/4, /* rx dma control */ + Rxpbsize = 0x03c00/4, /* rx packet buffer size */ + Rxctl = 0x03000/4, /* rx control */ + Dropen = 0x03d04/4, /* drop enable control */ + + /* rx */ + Rxcsum = 0x05000/4, /* rx checksum control */ + Rfctl = 0x04008/4, /* rx filter control */ + Mta = 0x05200/4, /* multicast table array (0-127) */ + Ral = 0x05400/4, /* rx address low */ + Rah = 0x05404/4, + Psrtype = 0x05480/4, /* packet split rx type. */ + Vfta = 0x0a000/4, /* vlan filter table array. */ + Fctrl = 0x05080/4, /* filter control */ + Vlnctrl = 0x05088/4, /* vlan control */ + Msctctrl = 0x05090/4, /* multicast control */ + Mrqc = 0x05818/4, /* multiple rx queues cmd */ + Vmdctl = 0x0581c/4, /* vmdq control */ + Imir = 0x05a80/4, /* immediate irq rx (0-7) */ + Imirext = 0x05aa0/4, /* immediate irq rx ext */ + Imirvp = 0x05ac0/4, /* immediate irq vlan priority */ + Reta = 0x05c00/4, /* redirection table */ + Rssrk = 0x05c80/4, /* rss random key */ + + /* tx */ + Tdbal = 0x06000/4, /* tx desc base low +0x40n */ + Tdbah = 0x06004/4, /* " high */ + Tdlen = 0x06008/4, /* " len */ + Tdh = 0x06010/4, /* " head */ + Tdt = 0x06018/4, /* " tail */ + Txdctl = 0x06028/4, /* " control */ + Tdwbal = 0x06038/4, /* " write-back address low */ + Tdwbah = 0x0603c/4, + + Dtxctl = 0x04a80/4, /* tx dma control !82598 */ + Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */ + Tipg = 0x0cb00/4, /* tx inter-packet gap */ + Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */ + + /* mac */ + Hlreg0 = 0x04240/4, /* highlander control reg 0 */ + Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */ + Msca = 0x0425c/4, /* mdi signal cmd & addr */ + Msrwd = 0x04260/4, /* mdi single rw data */ + Mhadd = 0x04268/4, /* mac addr high & max frame */ + Pcss1 = 0x04288/4, /* xgxs status 1 */ + Pcss2 = 0x0428c/4, + Xpcss = 0x04290/4, /* 10gb-x pcs status */ + Serdesc = 0x04298/4, /* serdes control */ + Macs = 0x0429c/4, /* fifo control & report */ + Autoc = 0x042a0/4, /* autodetect control & status */ + Links = 0x042a4/4, /* link status */ + Autoc2 = 0x042a8/4, +}; + +enum{ + /* Ctrl */ + Rst = 1<<26, /* full nic reset */ + + /* Txdctl */ + Ten = 1<<25, + + /* Dtxctl */ + Den = 1<<0, + + /* Fctrl */ + Rfce = 1<<15, /* rcv flow control enable */ + Dpf = 1<<13, /* discard pause frames */ + Bam = 1<<10, /* broadcast accept mode */ + Upe = 1<<9, /* unicast promiscuous */ + Mpe = 1<<8, /* multicast promiscuous */ + + /* Rxdctl */ + Pthresh = 0, /* prefresh threshold shift in bits */ + Hthresh = 8, /* host buffer minimum threshold " */ + Wthresh = 16, /* writeback threshold */ + Renable = 1<<25, + + /* Rxctl */ + Rxen = 1<<0, + Dmbyps = 1<<1, + + /* Rdrxctl */ + Rdmt½ = 0, + Rdmt¼ = 1, + Rdmt⅛ = 2, + + /* Rxcsum */ + Ippcse = 1<<12, /* ip payload checksum enable */ + + /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ + + /* interrupts */ + Irx0 = 1<<0, /* driver defined */ + Itx0 = 1<<1, /* driver defined */ + Lsc = 1<<20, /* link status change */ + Ioc = 1<<31, /* other cause */ + + /* Links */ + Lnkup = 1<<30, + Lnkspd8 = 1<<29, + Lnkspd9 = 3<<28, + + /* Hlreg0 */ + Txcrcen = 1<<0, + Jumboen = 1<<2, + + /* Ivar */ + Ivtx = 1|1<<7, /* transmit interrupt */ + Ivrx = 0|1<<7, /* receive interrupt */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlrtype Ctlrtype; +typedef struct Rd Rd; +typedef struct Rbpool Rbpool; +typedef struct Stat Stat; +typedef struct Td Td; + +enum { + i82598, + i82599, + x540, + Nctlrtype, +}; + +struct Ctlrtype { + int type; + int mtu; + int flag; + char *name; +}; + +enum { + Fphyoc = 1<<0, /* phy link needs other cause interrupt */ + Fsplitivar = 1<<1, /* tx and rx use different ivar entries */ + Fphyspd = 1<<2, /* phy speed useful (part supports <10gbe) */ + Ftxctl = 1<<3, /* part has txctl register */ +}; + +/* real mtu is 12k. use standard 9k to save memory */ +static Ctlrtype cttab[Nctlrtype] = { + i82598, 9*1024, Fsplitivar|Fphyoc, "i82598", + i82599, 9*1024, Fphyspd|Ftxctl, "i82599", + x540, 9*1024, Fphyspd|Ftxctl, "x540", +}; + +/* status */ +enum{ + Pif = 1<<7, /* past exact filter (sic) */ + Ipcs = 1<<6, /* ip checksum calcuated */ + L4cs = 1<<5, /* layer 2 */ + Tcpcs = 1<<4, /* tcp checksum calcuated */ + Vp = 1<<3, /* 802.1q packet matched vet */ + Ixsm = 1<<2, /* ignore checksum */ + Reop = 1<<1, /* end of packet */ + Rdd = 1<<0, /* descriptor done */ +}; + +struct Rd { + u32int addr[2]; + u16int length; + u16int cksum; + uchar status; + uchar errors; + u16int vlan; +}; + +enum{ + /* Td cmd */ + Rs = 1<<3, + Ic = 1<<2, + Ifcs = 1<<1, + Teop = 1<<0, + + /* Td status */ + Tdd = 1<<0, +}; + +struct Td { + u32int addr[2]; + u16int length; + uchar cso; + uchar cmd; + uchar status; + uchar css; + u16int vlan; +}; + +enum{ + Factive = 1<<0, + Fstarted = 1<<1, +}; + +typedef void (*Freefn)(Msgbuf*); + +struct Ctlr { + Pcidev *p; + uintmem port; + u32int *reg; + uchar flag; + uint poolno; + Rbpool *pool; + int nrd, ntd, nrb, rbsz; + QLock slock, alock, tlock; + Rendez lrendez, trendez, rrendez; + uint im, lim, rim, xtim; + Lock imlock; + char *alloc; + Rd *rdba; + Msgbuf **rb; + uint rdt, rdfree; + Td *tdba; + uint tdh, tdt; + Msgbuf **tb; + uchar ra[Easize]; + uchar mta[128]; +// uvlong stats[nelem(stattab)]; + int type; + uint speeds[4]; + uint nobufs; + + char tname[28]; + char rname[28]; +}; + +struct Rbpool { + union { + struct { + Lock; + Msgbuf *b; + uint nstarve; + uint nwakey; + uint starve; + Rendez; + }; + uchar pad[64]; /* cacheline */ + }; + union { + struct { + Msgbuf *x; + uint nfast; + uint nslow; + }; + uchar pad[64]; /* cacheline */ + }; +}; + +/* tweakable parameters */ +enum{ + Nrd = 256, + Ntd = 256, + Nrb = 2048, + Nctlr = 8, + Rbalign = 8, /* ideally, 4k */ +}; + +static Ctlr *ctlrtab[Nctlr]; +static Lock rblock[Nctlr]; +static Rbpool rbtab[Nctlr]; +static int nctlr; + +char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static void +im(Ctlr *c, int i) +{ + ilock(&c->imlock); + c->im |= i; + c->reg[Ims] = c->im; + iunlock(&c->imlock); +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Msgbuf* +rballoc(Rbpool *p) +{ + Msgbuf *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + b->flags &= ~FREE; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->starve = 1; + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Msgbuf *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->data = (uchar*)ROUNDUP((uintptr)b->xdata, Rbalign); + b->count = 0; + b->flags = FREE; + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(1) + print("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Msgbuf *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Msgbuf *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Msgbuf *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Msgbuf *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Msgbuf *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Msgbuf *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Msgbuf *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Msgbuf *b) +{ + rbfree(b, 7); +} + +static Freefn freetab[Nctlr] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, +}; + +#define Next(x, m) (((x)+1) & (m)) +static int +cleanup(Ctlr *c, int tdh) +{ + Msgbuf *b; + uint m, n; + + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status&Tdd){ + tdh = n; + b = c->tb[tdh]; + c->tb[tdh] = 0; + mbfree(b); + c->tdba[tdh].status = 0; + } + return tdh; +} + +static void +transmit(Ether *e) +{ + uint i, m, tdt, tdh; + Ctlr *c; + Msgbuf *b; + Td *t; + + c = e->ctlr; +// qlock(&c->tlock); + if(!canqlock(&c->tlock)){ + im(c, Itx0); + return; + } + tdh = c->tdh = cleanup(c, c->tdh); + tdt = c->tdt; + m = c->ntd-1; + for(i = 0; i<8; i++){ + if(Next(tdt, m) == tdh){ + im(c, Itx0); + break; + } + if((b = etheroq(e)) == nil) + break; + t = c->tdba+tdt; + t->addr[0] = Pciwaddrl(b->data); + t->addr[1] = Pciwaddrh(b->data); + t->length = b->count; + t->cmd = Rs|Ifcs|Teop; + c->tb[tdt] = b; + tdt = Next(tdt, m); + } + if(i){ + c->tdt = tdt; + coherence(); + c->reg[Tdt] = tdt; + } + qunlock(&c->tlock); +} + +static int +xtim(void *c) +{ + return ((Ctlr*)c)->xtim != 0; +} + +static void +tproc(void) +{ + Ether *e; + Ctlr *c; + + e = u->arg; + c = e->ctlr; +loop: + sleep(&c->trendez, xtim, c); /* transmit kicks us */ + c->xtim = 0; + transmit(e); + goto loop; +} + +static void +rxinit(Ctlr *c) +{ + Msgbuf *b; + int i; + + c->reg[Rxctl] &= ~Rxen; + for(i = 0; inrd; i++){ + b = c->rb[i]; + c->rb[i] = 0; + if(b) + mbfree(b); + } + c->rdfree = 0; + + c->reg[Fctrl] |= Bam|Rfce|Dpf; + c->reg[Rxcsum] |= Ipcs; + c->reg[Srrctl] = (c->rbsz+1023)/1024; + c->reg[Mhadd] = c->rbsz<<16; + c->reg[Hlreg0] |= Txcrcen|Jumboen; + + c->reg[Rbal] = PCIWADDR(c->rdba); + c->reg[Rbah] = 0; + c->reg[Rdlen] = c->nrd*sizeof(Rd); + c->reg[Rdh] = 0; + c->reg[Rdt] = c->rdt = 0; + + c->reg[Rdrxctl] = Rdmt¼; + c->reg[Rxdctl] = 8<reg[Rxctl] |= Rxen|Dmbyps; +} + +static int +replenish(Ctlr *c, uint rdh, int maysleep) +{ + int rdt, m, i; + Msgbuf *b; + Rd *r; + Rbpool *p; + + m = c->nrd-1; + i = 0; + p = c->pool; + for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){ + r = c->rdba+rdt; + while((b = rballoc(c->pool)) == nil){ + c->nobufs++; + if(maysleep == 0) + goto nobufs; + if(1){ + print("%s:%d: starve\n", cname(c), c->poolno); + } + sleep(p, icansleep, p); + } + c->rb[rdt] = b; + r->addr[0] = Pciwaddrl(b->data); + r->addr[1] = Pciwaddrh(b->data); + r->status = 0; + c->rdfree++; + i++; + } +nobufs: + if(i){ + coherence(); + c->reg[Rdt] = c->rdt = rdt; + } + if(rdt == rdh) + return -1; + return 0; +} + +static int +rim(void *v) +{ + return ((Ctlr*)v)->rim != 0; +} + +static void +rproc(void) +{ + Ether *e; + Ctlr *c; + Msgbuf *b; + Rd *r; + uint m, rdh; + + e = u->arg; + c = e->ctlr; + m = c->nrd-1; + rdh = 0; +loop: + replenish(c, rdh, 1); + im(c, Irx0); + sleep(&c->rrendez, rim, c); +loop1: + c->rim = 0; + if(c->nrd-c->rdfree >= 16) + if(replenish(c, rdh, 0) == -1) + goto loop; + r = c->rdba+rdh; + if(!(r->status&Rdd)) + goto loop; + b = c->rb[rdh]; + c->rb[rdh] = 0; + b->count = r->length; + if(!(r->status&Ixsm)){ + if(r->status&Ipcs) + b->flags |= Bipck; + if(r->status&Tcpcs) + b->flags |= Btcpck|Budpck; + // b->checksum = r->cksum; + } + r->status = 0; + etheriq(e, b); + c->rdfree--; + rdh = Next(rdh, m); + goto loop1; +} + +static int +detach(Ctlr *c) +{ + int i; + + c->reg[Imc] = ~0; + c->reg[Ctrl] |= Rst; + for(i = 0; i < 100; i++){ + delay(1); + if((c->reg[Ctrl]&Rst) == 0) + goto good; + } + return -1; +good: + /* errata */ + delay(50); + c->reg[Ecc] &= ~(1<<21|1<<18|1<<9|1<<6); + + /* not cleared by reset; kill it manually. */ + for(i = 1; i<16; i++) + c->reg[Rah] &= ~(1<<31); + for(i = 0; i<128; i++) + c->reg[Mta+i] = 0; + for(i = 1; i<640; i++) + c->reg[Vfta+i] = 0; + return 0; +} + +static void +shutdown(Ether *e) +{ + detach(e->ctlr); +} + +/* ≤ 20ms */ +static ushort +eeread(Ctlr *c, int i) +{ + c->reg[Eerd] = EEstart|i<<2; + while((c->reg[Eerd]&EEdone) == 0) + ; + return c->reg[Eerd]>>16; +} + +static int +eeload(Ctlr *c) +{ + ushort u, v, p, l, i, j; + + if((eeread(c, 0)&0xc0) != 0x40) + return -1; + u = 0; + for(i = 0; i < 0x40; i++) + u += eeread(c, i); + for(i = 3; i < 0xf; i++){ + if(c->type == x540 && (i == 4 || i == 5)) + continue; + p = eeread(c, i); + l = eeread(c, p++); + if((int)p+l+1 > 0xffff) + continue; + for(j = p; j < p+l; j++) + u += eeread(c, j); + } + if(u != 0xbaba) + return -1; + if(c->reg[Status]&1<<3) + u = eeread(c, 10); + else + u = eeread(c, 9); + u++; + for(i = 0; ira[i++] = v; + c->ra[i++] = v>>8; + } + c->ra[5] += (c->reg[Status]&0xc)>>2; + return 0; +} + +static int +reset(Ctlr *c) +{ + uchar *p; + int i; + + if(detach(c)){ + print("%s: reset timeout\n", cname(c)); + return -1; + } + if(eeload(c)){ + print("%s: eeprom failure\n", cname(c)); + return -1; + } + p = c->ra; + c->reg[Ral] = p[3]<<24|p[2]<<16|p[1]<<8|p[0]; + c->reg[Rah] = p[5]<<8|p[4]|1<<31; + + c->reg[Ctrlext] |= 1<<16; + /* make some guesses for flow control */ + c->reg[Fcrtl] = 0x10000|1<<31; + c->reg[Fcrth] = 0x40000|1<<31; + c->reg[Rcrtv] = 0x6000; + + /* configure interrupt mapping (don't ask) */ + if(cttab[c->type].flag & Fsplitivar){ + c->reg[Ivar+0] = Ivrx; + c->reg[Ivar+64/4] = Ivtx; +// c->reg[Ivar+97/4] = (2|1<<7)<<8*(97%4); + }else + c->reg[Ivar+0] = Ivtx<<8 | Ivrx; + + /* interrupt throttling goes here. */ + for(i = Itr; ireg[i] = 128; /* ¼µs intervals */ + c->reg[Itr+Itx0] = 256; + return 0; +} + +static void +txinit(Ctlr *c) +{ + Msgbuf *b; + int i; + + c->reg[Txdctl] = 16<ntd; i++){ + b = c->tb[i]; + c->tb[i] = 0; + if(b) + mbfree(b); + } + memset(c->tdba, 0, c->ntd*sizeof(Td)); + c->reg[Tdbal] = PCIWADDR(c->tdba); + c->reg[Tdbah] = 0; + c->reg[Tdlen] = c->ntd*sizeof(Td); + c->reg[Tdh] = 0; + c->reg[Tdt] = 0; + c->tdh = c->ntd-1; + c->tdt = 0; + if(cttab[c->type].flag & Ftxctl) + c->reg[Dtxctl] |= Den; + c->reg[Txdctl] |= Ten; +} + +static void +attach(Ether *e) +{ + Ctlr *c; + int t; + + c = e->ctlr; + qlock(&c->alock); + if(c->alloc){ + qunlock(&c->alock); + return; + } + + c->nrd = Nrd; + c->ntd = Ntd; + t = c->nrd*sizeof *c->rdba+255; + t += c->ntd*sizeof *c->tdba+255; + t += (c->ntd+c->nrd)*sizeof(Msgbuf*); + c->alloc = ialloc(t, 0); + qunlock(&c->alock); + + c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256); + c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba+c->nrd), 256); + c->rb = (Msgbuf**)(c->tdba+c->ntd); + c->tb = (Msgbuf**)(c->rb+c->nrd); + + mballocpool(Nrb, c->rbsz+Rbalign, Rbalign, Mbeth10gbebg, freetab[c->poolno]); + + rxinit(c); + txinit(c); + + sprint(c->rname, "#l%dr", e->ctlrno); + userinit(rproc, e, c->rname); + sprint(c->tname, "#l%dt", e->ctlrno); + userinit(tproc, e, c->tname); +} + +static void +interrupt(Ureg*, void *v) +{ + Ether *e; + Ctlr *c; + int icr, im; + + e = v; + c = e->ctlr; + ilock(&c->imlock); + c->reg[Imc] = ~0; + im = c->im; + while(icr = c->reg[Icr]&c->im){ + if(icr&Lsc){ + im &= ~Lsc; + c->lim = icr&Lsc; + wakeup(&c->lrendez); + } + if(icr&Irx0){ + im &= ~Irx0; + c->rim = icr&Irx0; + wakeup(&c->rrendez); + } + if(icr&Itx0){ + im &= ~Itx0; + c->xtim = icr&Itx0; + wakeup(&c->trendez); + } + } + c->reg[Ims] = c->im = im; + iunlock(&c->imlock); +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +scan(void) +{ + char *name; + uintmem io; + int type; + void *mem; + Ctlr *c; + Pcidev *p; + + p = 0; + while(p = pcimatch(p, 0x8086, 0)){ + hbafixup(p); + switch(p->did){ + case 0x10c6: /* 82598 af dual port */ + case 0x10c7: /* 82598 af single port */ + case 0x10b6: /* 82598 backplane */ + case 0x10dd: /* 82598 at cx4 */ + case 0x10ec: /* 82598 at cx4 */ + type = i82598; + break; + case 0x10f7: /* 82599 kx/kx4 */ + case 0x10f8: /* 82599 backplane */ + case 0x10f9: /* 82599 cx4 */ + case 0x10fb: /* 82599 sfi/sfp+ */ + case 0x10fc: /* 82599 xaui */ + case 0x151c: /* 82599 base t kx/kx4 “niantic” */ + type = i82599; + break; + case 0x1528: /* x540-at2 “twinville” */ + type = x540; + break; + default: + continue; + } + name = cttab[type].name; + if(nctlr == nelem(ctlrtab)){ + print("%s: %τ: too many controllers\n", name, p->tbdf); + return; + } + io = p->mem[0].bar&~0xf; + mem = vmap(io, p->mem[0].size); + if(mem == 0){ + print("%s: %τ: cant map bar\n", name, p->tbdf); + continue; + } + c = ialloc(sizeof *c, 0); + c->p = p; + c->port = io; + c->reg = (u32int*)mem; + c->rbsz = cttab[type].mtu; + c->type = type; + if(reset(c)){ + print("%s: %τ: cant reset\n", name, p->tbdf); + // free(c); + // vunmap(mem, p->mem[0].size); + continue; + } + pcisetbme(p); + c->poolno = nctlr; + c->pool = rbtab + c->poolno; + ctlrtab[nctlr++] = c; + } +} + +int +i82598pnp(Ether *e) +{ + Ctlr *c; + int i; + + if(nctlr == 0) + scan(); + for(i = 0; iflag&Factive) + continue; + if(ethercfgmatch(e, c->p, c->port) == 0) + goto found; + } + return -1; +found: + c->flag |= Factive; + e->ctlr = c; + e->port = (uintptr)c->reg; + e->irq = c->p->intl; + e->tbdf = c->p->tbdf; + e->mbps = 10000; + e->ifc.maxmtu = c->rbsz; + memmove(e->ea, c->ra, Easize); +// e->arg = e; + e->attach = attach; + e->interrupt = interrupt; + e->transmit = transmit; + + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,381 @@ +#include "all.h" +#include "io.h" + +#include "../ip/ip.h" +#include "../dev/aoe.h" +#include "etherif.h" + +#define dprint(...) /* print(__VA_ARGS__) */ + +Ether etherif[MaxEther]; +int nether; + +void +etheriq(Ether* ether, Msgbuf* mb) +{ + ilock(ðer->rqlock); + if(ether->rqhead) + ether->rqtail->next = mb; + else + ether->rqhead = mb; + ether->rqtail = mb; + mb->next = 0; + iunlock(ðer->rqlock); + + wakeup(ðer->rqr); +} + +static int +isinput(void* arg) +{ + return ((Ether*)arg)->rqhead != 0; +} + +static void +etheri(void) +{ + Ether *ether; + Ifc *ifc; + Msgbuf *mb; + Enpkt *p; + + ether = u->arg; + ifc = ðer->ifc; + print("ether%di: %E %I\n", ether->ctlrno, ether->ifc.ea, ether->ifc.ipa); + ether->attach(ether); + + for(;;) { + sleep(ðer->rqr, isinput, ether); + + ilock(ðer->rqlock); + if(ether->rqhead == 0) { + iunlock(ðer->rqlock); + continue; + } + mb = ether->rqhead; + ether->rqhead = mb->next; + iunlock(ðer->rqlock); + + p = (Enpkt*)mb->data; + switch(nhgets(p->type)){ + case Arptype: + arpreceive(p, mb->count, ifc); + break; + case Cectype: + cecreceive(p, mb->count, ifc); + break; + case Aoetype: + aoereceive(p, mb->count, ifc); + break; + case Iptype: + ipreceive(p, mb->count, ifc); + break; + default: + goto done; + } + ifc->rxpkt++; + ifc->work.count++; + ifc->rate.count += mb->count; + done: + mbfree(mb); + } +} + +#ifdef no +static void +ethero(void) +{ + Ether *ether; + Ifc *ifc; + Msgbuf *mb; + int len; + + ether = u->arg; + ifc = ðer->ifc; + print("ether%do: %E %I\n", ether->ctlrno, ifc->ea, ifc->ipa); + + for(;;) { + mb = recv(ifc->reply, 1); + if(mb == nil) + continue; + + len = mb->count; + if(len > ether->ifc.maxmtu){ + print("ether%do: pkt too big - %d\n", ether->ctlrno, len); + mbfree(mb); + continue; + } + if(len < ETHERMINTU) { + memset(mb->data+len, 0, ETHERMINTU-len); + mb->count = len = ETHERMINTU; + } + memmove(((Enpkt*)(mb->data))->s, ifc->ea, sizeof(ifc->ea)); + + ilock(ðer->tqlock); + if(ether->tqhead) + ether->tqtail->next = mb; + else + ether->tqhead = mb; + ether->tqtail = mb; + mb->next = 0; + iunlock(ðer->tqlock); + + ether->transmit(ether); + + ifc->work.count++; + ifc->rate.count += len; + ifc->txpkt++; + } +} + +Msgbuf* +etheroq(Ether* ether) +{ + Msgbuf *mb; + + mb = nil; + ilock(ðer->tqlock); + if(ether->tqhead){ + mb = ether->tqhead; + ether->tqhead = mb->next; + } + iunlock(ðer->tqlock); + + return mb; +} +#endif + +/* + * look, ma. no extra queue. + */ +static void +ethero(void) +{ + Ether *e; + + e = u->arg; + print("ether%do: %E %I\n", e->ctlrno, e->ifc.ea, e->ifc.ipa); + + for(;;){ + recv(e->ifc.reply, 0); // wait for something to do. + e->transmit(e); + } +} + +Msgbuf* +etheroq(Ether* e) +{ + Msgbuf *m; + Enpkt *p; + Ifc *f; + int len; + + f = &e->ifc; +loop: + if(f->reply->count == 0) + return 0; + m = recv(f->reply, 1); + len = m->count; + if(len > f->maxmtu){ + print("ether%do: pkt too big - %d\n", e->ctlrno, len); + mbfree(m); + goto loop; + } + if(len < ETHERMINTU){ + memset(m->data+len, 0, ETHERMINTU-len); + m->count = len = ETHERMINTU; + } + p = (Enpkt*)m->data; + memmove(p->s, f->ea, sizeof f->ea); + + f->work.count++; + f->rate.count += len; + f->txpkt++; + + return m; +} + +Msgbuf* +etheroq1(Ether* e, int ret) +{ + Msgbuf *m; + Enpkt *p; + Ifc *f; + int len; + + f = &e->ifc; +loop: + if(ret){ + if(f->reply->count == 0) + return 0; + } + m = recv(f->reply, 1); + len = m->count; + if(len > f->maxmtu){ + print("ether%do: pkt too big - %d\n", e->ctlrno, len); + mbfree(m); + goto loop; + } + if(len < ETHERMINTU){ + memset(m->data+len, 0, ETHERMINTU-len); + m->count = len = ETHERMINTU; + } + p = (Enpkt*)m->data; + memmove(p->s, f->ea, sizeof f->ea); + + f->work.count++; + f->rate.count += len; + f->txpkt++; + + return m; +} + +static void +cmd_state(int, char*[]) +{ + int i; + Ifc *ifc; + + for(i = 0; i < nether; i++){ + if(etherif[i].mbps == 0) + continue; + + ifc = ðerif[i].ifc; + print("ether stats %d %E\n", etherif[i].ctlrno, etherif[i].ea); + print(" work =%9W pkts\n", &ifc->work); + print(" rate =%9W Bps\n", &ifc->rate); + print(" err = %3ld rc %3ld sum\n", ifc->rcverr, ifc->sumerr); + } +} + +void +etherstart(void) +{ + int i; + Ifc *ifc, *tail; + char buf[100], *p; + + nether = 0; + tail = 0; + for(i = 0; i < MaxEther; i++){ + if(etherif[i].mbps == 0) + continue; + + ifc = ðerif[i].ifc; + lock(ifc); + getipa(ifc, etherif[i].ctlrno); + if(!isvalidip(ifc->ipa)){ + unlock(ifc); + etherif[i].mbps = 0; + continue; + } + if(ifc->reply == 0){ + dofilter(&ifc->work); + dofilter(&ifc->rate); + ifc->reply = newqueue(Nqueue); + } + unlock(ifc); + + sprint(etherif[i].oname, "ether%do", etherif[i].ctlrno); + userinit(ethero, etherif+i, etherif[i].oname); + sprint(etherif[i].iname, "ether%di", etherif[i].ctlrno); + userinit(etheri, etherif+i, etherif[i].iname); + + ifc->next = nil; + if(enets != nil) + tail->next = ifc; + else + enets = ifc; + tail = ifc; + nether++; + } + + if(nether){ + cmd_install("state", "-- ether stats", cmd_state); + arpstart(); + if(p = getconf("route")){ + snprint(buf, sizeof buf, "route %s", p); + cmd_exec(buf); + } + } +} + +static int +parseether(uchar *to, char *from) +{ + char nip[4]; + char *p; + int i; + + p = from; + while(*p == ' ') + ++p; + for(i = 0; i < 6; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +int +ethercfgmatch(Ether *e, Pcidev *p, uintmem port) +{ + if(e->port == 0 || e->port == port) + if(e->tbdf == BUSUNKNOWN || p == nil || e->tbdf == p->tbdf){ + return 0; + } + return -1; +} + +void +etherinit(void) +{ + char buf[32]; + int i, n, ctlrno; + Ether *e; + + for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ + e = etherif+ctlrno; + memset(e, 0, sizeof *e); + if(!pciconfig("ether", ctlrno, e)) + continue; + for(n = 0; n < netherctlr; n++){ + if(cistrcmp(etherctlr[n].type, e->type)) + continue; + dprint("FOUND ether %s\n", etherctlr[n].type); + e->ctlrno = ctlrno; + e->tbdf = BUSUNKNOWN; + e->ifc.maxmtu = ETHERMAXTU; + for(i = 0; i < e->nopt; i++){ + if(strncmp(e->opt[i], "ea=", 3)) + continue; + if(parseether(e->ea, &e->opt[i][3]) == -1) + memset(e->ea, 0, Easize); + } + dprint(" reset ... "); + if(etherctlr[n].reset(e)){ + dprint("fail\n"); + break; + } + dprint("okay\n"); + if(e->irq == 2) + e->irq = 9; + memmove(e->ifc.ea, e->ea, sizeof e->ea); + snprint(buf, sizeof buf, "ether%d", ctlrno); + intrenable(e->irq, e->interrupt, e, e->tbdf, buf); + + print("ether%d: %s: %dMbps port %#p irq %d mtu %d", + ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu); + print(": %E\n", e->ea); + break; + } + } +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,56 @@ +typedef struct Ether Ether; +typedef struct Etherctlr Etherctlr; + +struct Ether { + Pciconf; /* hardware info */ + + int ctlrno; + char iname[NAMELEN]; + char oname[NAMELEN]; + int tbdf; /* type+busno+devno+funcno */ + int mbps; /* Mbps */ + uchar ea[Easize]; + + void (*attach)(Ether*); /* filled in by reset routine */ + void (*transmit)(Ether*); + void (*interrupt)(Ureg*, void*); + void *ctlr; + + Ifc ifc; + + Lock rqlock; + Msgbuf *rqhead; + Msgbuf *rqtail; + Rendez rqr; + + Lock tqlock; + Msgbuf *tqhead; + Msgbuf *tqtail; + Rendez tqr; +}; + +struct Etherctlr{ + char *type; + int (*reset)(Ether*); +}; + +extern Etherctlr etherctlr[]; +extern int netherctlr; +extern Ether etherif[MaxEther]; +extern int nether; + +void etheriq(Ether*, Msgbuf*); +Msgbuf *etheroq(Ether*); +int ethercfgmatch(Ether*, Pcidev*, uintmem); + +int etherga620reset(Ether*); +int ether21140reset(Ether*); +int etherelnk3reset(Ether*); +int etheri82557reset(Ether*); +int igbepnp(Ether*); +int dp83815reset(Ether*); +int dp83820pnp(Ether*); +int rtl8139pnp(Ether*); +int rtl8169pnp(Ether*); +int i82563reset(Ether*); +int m10gpnp(Ether*); --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,122 @@ +#include "../port/portfns.h" + +/* junk */ +void waveprint(char*, ...); +void wave(int); +#pragma varargck argpos waveprint 1 + +void vunmap(void*, usize); +#define machcolor(m) -1 +void acpiinit(int); + +int adec(int*); +int ainc(int*); +void apmmuinit(void); +int archmmu(void); +#define BIOSSEG(a) KADDR(((uint)(a))<<4) +void cgaputc(int); +void cgaputs(char*, int); +void cmd_e820(int, char**); +#define coherence() mfence(); +void cpuid(Cpuidreg*); +void cycles(uvlong*); +void etherinit(void); +void etherstart(void); +void fpuinit(void); +void gdtput(int, u64int, u16int); +char* getconf(char*); +u64int getcr0(void); +u64int getcr2(void); +u64int getcr3(void); +u64int getcr4(void); +void halt(void); +void hardhalt(void); +void i8042a20(void); +void i8042reset(void); +void idle(void); +void idthandlers(void); +void idtput(int, u64int); +int inb(int); +u32int inl(int); +void insb(int, void*, int); +ushort ins(int); +void insl(int, void*, int); +void inss(int, void*, int); +int islo(void); +#define KADDR(pa) kaddr(pa) +void* kaddr(uintmem); +int kbdgetc(void); +void kbdinit(void); +int kbdintr0(void); +void mfence(void); +void microdelay(int); +void mmuinit(void); +uintmem mmuphysaddr(uintptr); +int mmuwalk(PTE*, uintptr, int, PTE**, uintmem (*)(usize)); +void mpsinit(int); +void ndnr(void); +uchar nvramread(int); +void nvramwrite(int, uchar); +void outb(int, int); +void outl(int, u32int); +void outsb(int, void*, int); +void outs(int, u16int); +void outsl(int, void*, int); +void outss(int, void*, int); +#define PADDR(va) paddr(va) +uintmem paddr(void*); +#define perfticks() rdtsc() +void printcpufreq(void); +void putcr3(u64int); +void putcr4(u64int); +void puttr(u64int); +u64int rdmsr(int); +vlong rdtsc(void); +void sipi(void); +Mpl splhi(void); +Mpl spllo(void); +void splx(Mpl); +int tas32(u32int*); +#define tas(l) tas32((u32int*)l) +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +void trput(u64int); +int uartgetc(void); +void uartputc(int); +void uartputs(char*, int); +void uartspecial(int, void (*)(int), int (*)(void), int); +void* vmappat(uintmem, usize, uint); +int vmapsync(uintptr); +void* vmap(uintmem, usize); +void vsvminit(int); +void wrmsr(int, vlong); + +#define PTR2UINT(p) ((uintptr)(p)) +#define UINT2PTR(i) ((void*)(i)) + +//#define PCIWADDR(a) PADDR(a) + +/* pata */ +void ideinit(Device*); +Devsize idesize(Device*); +int ideread(Device*, Devsize, void*); +int idewrite(Device*, Devsize, void*); +int idesecsize(Device*); + +/* sata */ +void mvinit(Device*); +Devsize mvsize(Device*); +int mvread(Device*, Devsize, void*); +int mvwrite(Device*, Devsize, void*); + +/* aoe */ +void aoeinit(Device*); +Devsize aoesize(Device*); +int aoeread(Device*, Devsize, void*); +int aoewrite(Device*, Devsize, void*); + +/* iasata */ +void iainit(Device*); +Devsize iasize(Device*); +int iaread(Device*, Devsize, void*); +int iawrite(Device*, Devsize, void*); --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,31 @@ +#include "all.h" +#include "amd64.h" +#include + +void +fpunm(Ureg *ureg, void *) +{ + panic("cpu%d: #NM: %#p", m->machno, ureg->ip); +} + +void +fpumf(Ureg *ureg, void *) +{ + panic("cpu%d: #MF: %#p", m->machno, ureg->ip); +} + +void +fpuxf(Ureg *ureg, void *) +{ + panic("cpu%d: #XF: %#p", m->machno, ureg->ip); +} + +void +fpuinit(void) +{ + if(m->machno == 0){ + trapenable(IdtNM, fpunm, 0, "#NM"); + trapenable(IdtMF, fpumf, 0, "#MF"); + trapenable(IdtXF, fpuxf, 0, "#XF"); + } +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,1689 @@ +/* + * intel/amd ahci sata controller + * copyright © 2007-12 coraid, inc. + */ + +#include "all.h" +#include "io.h" +#include +#include "ahci.h" + +#define dprint(...) if(debug) print(__VA_ARGS__); else USED(debug) +#define idprint(...) if(prid ) print(__VA_ARGS__); else USED(prid) +#define aprint(...) if(datapi) print(__VA_ARGS__); else USED(datapi); + +enum { + SDok = 0, + SDeio = -1, + SDretry = -2, + SDcheck = -3, +}; + +enum { + NCtlr = 4, + NCtlrdrv = 32, + NDrive = NCtlr*NCtlrdrv, + + Fahdrs = 4, + + Read = 0, + Write, + + Eesb = 1<<0, /* must have (Eesb & Emtype) == 0 */ + + /* pci space configuration */ + Pmap = 0x90, + Ppcs = 0x91, + + Nms = 256, + Mphywait = 2*1024/Nms - 1, + Midwait = 16*1024/Nms - 1, + Mcomrwait = 64*1024/Nms - 1, +}; + +enum { + Tesb, + Tsb600, + Tjmicron, + Tahci, + Tlast, +}; + +typedef struct Ctlrtype Ctlrtype; +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +struct Ctlrtype { + uint type; + uint maxdmaxfr; + uint flags; + char *name; +}; + +Ctlrtype cttab[Tlast] = { +[Tesb] Tesb, 8192, 0, "63xxesb", +[Tsb600] Tsb600, 256, 0, "sb600", +[Tjmicron] Tjmicron, 8192, 0, "jmicron", +[Tahci] Tahci, 8192, 0, "ahci", +}; + +enum { + Dnull = 0, + Dmissing = 1<<0, + Dnew = 1<<1, + Dready = 1<<2, + Derror = 1<<3, + Dreset = 1<<4, + Doffline = 1<<5, + Dportreset = 1<<6, + Dlast = 8, +}; + +static char *diskstates[Dlast] = { + "null", + "missing", + "new", + "ready", + "error", + "reset", + "offline", + "portreset", +}; + +enum { + DMautoneg, + DMsatai, + DMsataii, + DMsataiii, + DMlast, +}; + +static char *modes[DMlast] = { + "auto", + "satai", + "sataii", + "sataiii", +}; + +struct Drive { + Lock; + + Ctlr *ctlr; + char name[10]; + Aport *port; + Aportm portm; + Aportc portc; /* redundant ptr to port and portm. */ + + uchar drivechange; + uchar state; + + uvlong sectors; + uint secsize; + ulong totick; + ulong lastseen; + uint wait; + uchar mode; + uchar active; + + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + uvlong wwn; + + ushort info[0x200]; + + /* + * ahci allows non-sequential ports. + * to avoid this hassle, we let + * driveno ctlr*NCtlrdrv + unit + * portno nth available port + */ + uint driveno; + uint portno; + + Filter rate[2]; + int fflag; + int init; + ulong reads, writes; +}; + +struct Ctlr { + Lock; + + Ctlrtype *type; + Pcidev *pci; + + uchar *mmio; + u32int *lmmio; + Ahba *hba; + + Drive rawdrive[NCtlrdrv]; + Drive* drive[NCtlrdrv]; + int ndrive; + uint pi; +}; + +static Ctlr iactlr[NCtlr]; +static int niactlr; + +static Drive *iadrive[NDrive]; +static int niadrive; + +static int debug; +static int prid = 1; +static int datapi; + +static char stab[] = { +[0] 'i', 'm', +[8] 't', 'c', 'p', 'e', +[16] 'N', 'I', 'W', 'B', 'D', 'C', 'H', 'S', 'T', 'F', 'X' +}; + +static void +serrstr(u32int r, char *s, char *e) +{ + int i; + + e -= 3; + for(i = 0; i < nelem(stab) && s < e; i++) + if(r & (1<task, p->cmd, p->ci, p->isr); +} + +void +xsleep(Rendez *r, int (*f)(void*), void *a) +{ + if(u) + sleep(r, f, a); + else + for(; !f(a);) + delay(1); +} + +void +xtsleep(Rendez *r, int (*f)(void*), void *a, int ms) +{ + int i; + + if(u) + tsleep(r, f, a, ms); + else + for(i = 0; i < ms; i++){ + if(f(a)) + break; + delay(1); + } +} + +static void +esleep(int ms) +{ + xtsleep(&u->tsleep, no, 0, ms); +} + +typedef struct { + Aport *p; + int i; +} Asleep; + +static int +ahciclear(void *v) +{ + Asleep *s; + + s = v; + return (s->p->ci & s->i) == 0; +} + +static void +aesleep(Aportm *m, Asleep *a, int ms) +{ + xtsleep(m, ahciclear, a, ms); +} + +static int +ahciwait(Aportc *c, int ms) +{ + Aport *p; + Asleep as; + + p = c->p; + p->ci = 1; + as.p = p; + as.i = 1; + aesleep(c->m, &as, ms); + if((p->task & 1) == 0 && p->ci == 0) + return 0; + dreg("ahciwait fail/timeout ", c->p); + return -1; +} + +static void +mkalist(Aportm *m, uint flags, uchar *data, int len) +{ + Actab *t; + Alist *l; + Aprdt *p; + + t = m->ctab; + l = m->list; + l->flags = flags | 0x5; + l->len = 0; + l->ctab = Pciwaddrl(t); + l->ctabhi = Pciwaddrh(t); + if(data){ + l->flags |= 1<<16; + p = &t->prdt; + p->dba = Pciwaddrl(data); + p->dbahi = Pciwaddrh(data); + p->count = 1<<31 | len - 2 | 1; + } +} + +static int +nop(Aportc *pc) +{ + uchar *c; + + if((pc->m->feat & Dnop) == 0) + return -1; + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static int +setfeatures(Aportc *pc, uchar f, uint w) +{ + uchar *c; + + c = pc->m->ctab->cfis; + featfis(pc->m, c, f); + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, w); +} + +static int +settxmode(Aportc *pc, uchar f) +{ + uchar *c; + + c = pc->m->ctab->cfis; + if(txmodefis(pc->m, c, f) == -1) + return 0; + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static void +asleep(int ms) +{ + esleep(ms); +} + +static int +ahciportreset(Aportc *c, uint mode) +{ + int i; + u32int *cmd; + Aport *p; + + p = c->p; + cmd = &p->cmd; + *cmd &= ~(Afre|Ast); + for(i = 0; i < 500; i += 25){ + if((*cmd & Acr) == 0) + break; + asleep(25); + } + p->sctl = 3*Aipm | 0*Aspd | Adet; + delay(1); + p->sctl = 3*Aipm | mode*Aspd; + return 0; +} + +static int +ahciidentify0(Aportc *pc, void *id) +{ + uchar *c; + Actab *t; + + t = pc->m->ctab; + c = t->cfis; + memset(id, 0, 0x200); + identifyfis(pc->m, c); + mkalist(pc->m, 0, id, 0x200); + return ahciwait(pc, 3*1000); +} + +static vlong +ahciidentify(Aportc *pc, ushort *id, uint *ss, char *d) +{ + int i, n; + vlong s; + Aportm *m; + + m = pc->m; + for(i = 0;; i++){ + if(i > 5 || ahciidentify0(pc, id) != 0) + return -1; + n = idpuis(id); + if(n & Pspinup && setfeatures(pc, 7, 20*1000) == -1) + print("%s: puis spinup fail\n", d); + if(n & Pidready) + break; + print("%s: puis waiting\n", d); + } + s = idfeat(m, id); + *ss = idss(m, id); + if(s == -1 || (m->feat&Dlba) == 0){ + if((m->feat&Dlba) == 0) + dprint("%s: no lba support\n", d); + return -1; + } + return s; +} + +static int +ahciquiet(Aport *a) +{ + int i; + u32int *p; + + p = &a->cmd; + *p &= ~Ast; + for(i = 0; i < 500; i += 50){ + if((*p & Acr) == 0) + goto stop; + asleep(50); + } + return -1; +stop: + if((a->task & (ASdrq|ASbsy)) == 0){ + *p |= Ast; + return 0; + } + + *p |= Aclo; + for(i = 0; i < 500; i += 50){ + if((*p & Aclo) == 0) + goto stop1; + asleep(50); + } + return -1; +stop1: + /* extra check */ + dprint("ahci: clo clear %ux\n", a->task); + if(a->task & ASbsy) + return -1; + *p |= Afre | Ast; + return 0; +} + +static int +ahcicomreset(Aportc *pc) +{ + uchar *c; + + dreg("comreset ", pc->p); + if(ahciquiet(pc->p) == -1){ + dprint("ahci: ahciquiet failed\n"); + return -1; + } + dreg("comreset ", pc->p); + + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 1); + mkalist(pc->m, Lclear | Lreset, 0, 0); + if(ahciwait(pc, 500) == -1){ + dprint("ahci: comreset1 failed\n"); + return -1; + } + microdelay(250); + dreg("comreset ", pc->p); + + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + if(ahciwait(pc, 150) == -1){ + dprint("ahci: comreset2 failed\n"); + return -1; + } + dreg("comreset ", pc->p); + return 0; +} + +static int +ahciidle(Aport *port) +{ + int i, r; + u32int *p; + + p = &port->cmd; + if((*p & Arun) == 0) + return 0; + *p &= ~Ast; + r = 0; + for(i = 0; i < 500; i += 25){ + if((*p & Acr) == 0) + goto stop; + asleep(25); + } + r = -1; +stop: + if((*p & Afre) == 0) + return r; + *p &= ~Afre; + for(i = 0; i < 500; i += 25){ + if((*p & Afre) == 0) + return 0; + asleep(25); + } + return -1; +} + +/* + * §6.2.2.1 first part; comreset handled by reset disk. + * - remainder is handled by configdisk. + * - ahcirecover is a quick recovery from a failed command. + */ +static int +ahciswreset(Aportc *pc) +{ + int i; + + i = ahciidle(pc->p); + pc->p->cmd |= Afre; + if(i == -1) + return -1; + if(pc->p->task & (ASdrq|ASbsy)) + return -1; + return 0; +} + +static int +ahcirecover(Aportc *pc) +{ + ahciswreset(pc); + pc->p->cmd |= Ast; + if(settxmode(pc, pc->m->udma) == -1) + return -1; + return 0; +} + +static void* +mallocalign(usize size, usize align, long, usize) +{ + return ialloc(size, align); +} + +static void +setupfis(Afis *f) +{ + f->base = mallocalign(0x100, 0x100, 0, 0); + f->d = f->base + 0; + f->p = f->base + 0x20; + f->r = f->base + 0x40; + f->u = f->base + 0x60; + f->devicebits = (u32int*)(f->base + 0x58); +} + +static void +ahciwakeup(Aportc *c, uint mode) +{ + ushort s; + + s = c->p->sstatus; + if((s & Isleepy) == 0) + return; + if((s & Smask) != Spresent){ + print("ahci: slumbering drive missing %.3ux\n", s); + return; + } + ahciportreset(c, mode); +// print("ahci: wake %.3ux -> %.3lux\n", s, c->p->sstatus); +} + +static int +ahciconfigdrive(Ahba *h, Aportc *c, int mode) +{ + Aportm *m; + Aport *p; + + p = c->p; + m = c->m; + + if(m->list == 0){ + setupfis(&m->fis); + m->list = mallocalign(sizeof *m->list, 1024, 0, 0); + m->ctab = mallocalign(sizeof *m->ctab, 128, 0, 0); + } + + p->list = Pciwaddrl(m->list); + p->listhi = Pciwaddrh(m->list); + p->fis = Pciwaddrl(m->fis.base); + p->fishi = Pciwaddrh(m->fis.base); + + p->cmd |= Afre; + + if((p->sstatus & Sbist) == 0 && (p->cmd & Apwr) != Apwr) + if((p->sstatus & Sphylink) == 0 && h->cap & Hss){ + dprint("ahci: spin up ... [%.3ux]\n", p->sstatus); + p->cmd |= Apwr; + for(int i = 0; i < 1400; i += 50){ + if(p->sstatus & (Sphylink | Sbist)) + break; + asleep(50); + } + } + + p->serror = SerrAll; + + if((p->sstatus & SSmask) == (Isleepy | Spresent)) + ahciwakeup(c, mode); + /* disable power managment sequence from book. */ + p->sctl = 3*Aipm | mode*Aspd | 0*Adet; + p->cmd &= ~Aalpe; + + p->cmd |= Ast; + p->ie = IEM; + + return 0; +} + +static void +setstate(Drive *d, int state) +{ + ilock(d); + d->state = state; + iunlock(d); +} + +static void +ahcienable(Ahba *h) +{ + h->ghc |= Hie; +} + +static void +ahcidisable(Ahba *h) +{ + h->ghc &= ~Hie; +} + +static int +countbits(u32int u) +{ + int i, n; + + n = 0; + for(i = 0; i < 32; i++) + if(u & (1<hba = (Ahba*)c->mmio; + u = h->cap; + + if((u & Ham) == 0) + h->ghc |= Hae; + + print("ahci hba sss %d; ncs %d; coal %d; mports %d; led %d; clo %d; ems %d;\n", + (u>>27) & 1, (u>>8) & 0x1f, (u>>7) & 1, u & 0x1f, (u>>25) & 1, + (u>>24) & 1, (u>>6) & 1); + return countbits(h->pi); +} + +static int +ahcihbareset(Ahba *h) +{ + int wait; + + h->ghc |= Hhr; + for(wait = 0; wait < 1000; wait += 100){ + if(h->ghc == 0) + return 0; + delay(100); + } + return -1; +} + +/* under development */ +static int +ahcibioshandoff(Ahba *h) +{ + int i, wait; + + if((h->cap2 & Boh) == 0) + return 0; + if((h->bios & Bos) == 0) + return 0; + + print("ahcibioshandoff: claim\n"); + h->bios |= Oos; + + wait = 25; + for(i = 0; i < wait; i++){ + delay(1); + if((h->bios & Bos) == 0) + break; + if(i < 25 && h->bios & Bb){ + print("ahcibioshandoff: busy\n"); + wait = 2000; + } + } + if(i == wait){ + print("ahcibioshandoff: timeout %.1ux\n", h->bios); + h->bios = Oos; + } + return 0; +} + +static char* +dstate(uint s) +{ + int i; + + for(i = 0; s; i++) + s >>= 1; + return diskstates[i]; +} + +static char* +tnam(Ctlr *c) +{ + return c->type->name; +} + +static char* +dnam(Drive *d) +{ + char *s; + + s = d->name; + if(d->name[0] == 0) + snprint(d->name, sizeof d->name, "a%d", d->driveno); + return s; +} + +static int +identify(Drive *d) +{ + uchar oserial[21]; + ushort *id; + vlong osectors, s; + + id = d->info; + s = ahciidentify(&d->portc, id, &d->secsize, dnam(d)); + if(s == -1){ + d->state = Derror; + return -1; + } + osectors = d->sectors; + memmove(oserial, d->serial, sizeof d->serial); + + d->sectors = s; + + idmove(d->serial, id+10, 20); + idmove(d->firmware, id+23, 8); + idmove(d->model, id+27, 40); + d->wwn = idwwn(d->portc.m, id); + + if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)) + d->drivechange = 1; + + return 0; +} + +static void +clearci(Aport *p) +{ + if(p->cmd & Ast){ + p->cmd &= ~Ast; + p->cmd |= Ast; + } +} + +static int +fmtσ(Fmt *f) +{ + Drive *d; + char buf[8]; + + d = va_arg(f->args, Drive*); + if(d == nil) + snprint(buf, sizeof buf, "s[nil]"); + else + snprint(buf, sizeof buf, "a%d", d->driveno); + return fmtstrcpy(f, buf); +} + +static int +intel(Ctlr *c) +{ + return c->pci->vid == 0x8086; +} + +static int +ignoreahdrs(Drive *d) +{ + return d->portm.feat & Datapi && d->ctlr->type->type == Tsb600; +} + +static void +updatedrive(Drive *d) +{ + u32int f, cause, serr, s0, pr, ewake; + Aport *p; + static u32int last; + + pr = 1; + ewake = 0; + f = 0; + p = d->port; + cause = p->isr; + if(d->ctlr->type->type == Tjmicron) + cause &= ~Aifs; + serr = p->serror; + p->isr = cause; + + if(p->ci == 0){ + f |= Fdone; + pr = 0; + }else if(cause & Adps) + pr = 0; + if(cause & Ifatal){ + ewake = 1; + dprint("%s: fatal\n", dnam(d)); + } + if(cause & Adhrs){ + if(p->task & 33){ + if(ignoreahdrs(d) && serr & ErrE) + f |= Fahdrs; + dprint("%s: Adhrs cause %ux serr %ux task %ux\n", + dnam(d), cause, serr, p->task); + f |= Ferror; + ewake = 1; + } + pr = 0; + } + if(p->task & 1 && last != cause) + dprint("%s: err ca %ux serr %ux task %ux sstat %.3ux\n", + dnam(d), cause, serr, p->task, p->sstatus); + if(pr) + dprint("%s: upd %ux ta %ux\n", dnam(d), cause, p->task); + + if(cause & (Aprcs|Aifs)){ + s0 = d->state; + switch(p->sstatus & Smask){ + case Smissing: + d->state = Dmissing; + break; + case Spresent: + if((p->sstatus & Imask) == Islumber) + d->state = Dnew; + else + d->state = Derror; + break; + case Sphylink: + /* power mgnt crap for suprise removal */ + p->ie |= Aprcs|Apcs; /* is this required? */ + d->state = Dreset; + break; + case Sbist: + d->state = Doffline; + break; + } + dprint("%s: %s → %s [Apcrs] %.3ux\n", dnam(d), dstate(s0), + dstate(d->state), p->sstatus); + if(s0 == Dready && d->state != Dready) + idprint("%s: pulled\n", dnam(d)); + if(d->state != Dready) + f |= Ferror; + if(d->state != Dready || p->ci) + ewake = 1; + } + p->serror = serr; + if(ewake) + clearci(p); + if(f){ + d->portm.flag = f; + wakeup(&d->portm); + } + last = cause; +} + +static void +pstatus(Drive *d, u32int s) +{ + /* + * bogus code because the first interrupt is currently dropped. + * likely my fault. serror is maybe cleared at the wrong time. + */ + if(s) + d->lastseen = Ticks; + switch(s){ + default: + print("%s: pstatus: bad status %.3ux\n", dnam(d), s); + case Smissing: + d->state = Dmissing; + break; + case Spresent: + break; + case Sphylink: + d->wait = 0; + d->state = Dnew; + break; + case Sbist: + d->state = Doffline; + break; + } +} + +static int +configdrive(Drive *d) +{ + if(ahciconfigdrive(d->ctlr->hba, &d->portc, d->mode) == -1) + return -1; + ilock(d); + pstatus(d, d->port->sstatus & Smask); + iunlock(d); + return 0; +} + +static void +resetdisk(Drive *d) +{ + uint state, det, stat; + Aport *p; + + p = d->port; + det = p->sctl & 7; + stat = p->sstatus & Smask; + state = (p->cmd>>28) & 0xf; + dprint("%s: resetdisk: icc %ux det %.3ux sdet %.3ux\n", dnam(d), state, det, stat); + + ilock(d); + state = d->state; + if(d->state != Dready || d->state != Dnew) + d->portm.flag |= Ferror; + clearci(p); /* satisfy sleep condition. */ + wakeup(&d->portm); + d->state = Derror; + iunlock(d); + + if(stat != Sphylink){ + setstate(d, Dportreset); + return; + } + + qlock(&d->portm); + if(p->cmd&Ast && ahciswreset(&d->portc) == -1) + setstate(d, Dportreset); /* get a bigger stick. */ + else{ + setstate(d, Dmissing); + configdrive(d); + } + dprint("%s: resetdisk: %s → %s\n", dnam(d), dstate(state), dstate(d->state)); + qunlock(&d->portm); +} + +static int +newdrive(Drive *d) +{ + char *s; + Aportc *c; + Aportm *m; + + c = &d->portc; + m = &d->portm; + + qlock(c->m); + setfissig(m, c->p->sig); + if(identify(d) == -1){ + dprint("%s: identify failure\n", dnam(d)); + goto lose; + } + if(settxmode(c, m->udma) == -1){ + dprint("%s: can't set udma mode\n", dnam(d)); + goto lose; + } + if(m->feat & Dpower && setfeatures(c, 0x85, 3*1000) == -1){ + m->feat &= ~Dpower; + if(ahcirecover(c) == -1) + goto lose; + } + + setstate(d, Dready); + + qunlock(c->m); + + s = ""; + if(m->feat & Dllba) + s = "L"; + idprint("%s: %sLBA %,lld sectors\n", dnam(d), s, d->sectors); + idprint(" %s %s %s %s\n", d->model, d->firmware, d->serial, + d->drivechange? "[newdrive]": ""); + return 0; + +lose: + idprint("%s: can't be initialized\n", dnam(d)); + setstate(d, Dnull); + qunlock(c->m); + return -1; +} + +static void +hangck(Drive *d) +{ + if((d->portm.feat & Datapi) == 0 && d->active && + d->totick != 0 && (long)(Ticks - d->totick) > 0){ + print("%s: drive hung; resetting [%ux] ci %ux\n", + dnam(d), d->port->task, d->port->ci); + d->state = Dreset; + } +} + +static ushort olds[NCtlr*NCtlrdrv]; + +static int +doportreset(Drive *d) +{ + int i; + + i = -1; + qlock(&d->portm); + if(ahciportreset(&d->portc, d->mode) == -1) + dprint("ahci: ahciportreset fails\n"); + else + i = 0; + qunlock(&d->portm); + dprint("ahci: portreset → %s [task %.4ux ss %.3ux]\n", + dstate(d->state), d->port->task, d->port->sstatus); + return i; +} + +/* drive must be locked */ +static void +statechange(Drive *d) +{ + switch(d->state){ + case Dnull: + case Doffline: + case Dready: + d->wait = 0; + } +} + +static uint +maxmode(Ctlr *c) +{ + return (c->hba->cap & 0xf*Hiss)/Hiss; +} + +static void +checkdrive(Drive *d, int i) +{ + ushort s, sig; + + ilock(d); + s = d->port->sstatus; + if(s) + d->lastseen = Ticks; + if(s != olds[i]){ + dprint("%s: status: %.3ux -> %.3ux: %s\n", + dnam(d), olds[i], s, dstate(d->state)); + olds[i] = s; + d->wait = 0; + } + hangck(d); + switch(d->state){ + case Dnull: + case Dready: + break; + case Dmissing: + case Dnew: + switch(s & (Iactive|Smask)){ + case Spresent: + ahciwakeup(&d->portc, d->mode); + case Smissing: + break; + default: + dprint("%s: unknown status %.3ux\n", dnam(d), s); + /* fall through */ + case Iactive: /* active, no device */ + if(++d->wait&Mphywait) + break; +reset: + if(d->mode == 0) + d->mode = maxmode(d->ctlr); + else + d->mode--; + if(d->mode == DMautoneg){ + d->state = Dportreset; + goto portreset; + } + dprint("%s: reset; new mode %s\n", dnam(d), + modes[d->mode]); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Iactive | Sphylink: + if((++d->wait&Midwait) == 0){ + dprint("%s: slow reset %.3ux task=%ux; %d\n", + dnam(d), s, d->port->task, d->wait); + goto reset; + } + s = (uchar)d->port->task; + sig = d->port->sig >> 16; + if(s == 0x7f || s&ASbsy || + (sig != 0xeb14 && (s & ASdrdy) == 0)) + break; + iunlock(d); + newdrive(d); + ilock(d); + break; + } + break; + case Doffline: + if(d->wait++ & Mcomrwait) + break; + /* fallthrough */ + case Derror: + case Dreset: + dprint("%s: reset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Dportreset: +portreset: + if(d->wait++ & 0xff && (s & Iactive) == 0) + break; + dprint("%s: portreset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + d->portm.flag |= Ferror; + clearci(d->port); + wakeup(&d->portm); + if((s & Smask) == 0){ + d->state = Dmissing; + break; + } + iunlock(d); + doportreset(d); + ilock(d); + break; + } + statechange(d); + iunlock(d); +} + +Rendez kprocr; + +static void +satakproc(void) +{ + int i; + + for(;;){ + xtsleep(&kprocr, no, 0, Nms); + for(i = 0; i < niadrive; i++) + checkdrive(iadrive[i], i); + } +} + +static void +iainterrupt(Ureg*, void *a) +{ + int i; + u32int cause, m; + Ctlr *c; + Drive *d; + + c = a; + ilock(c); + cause = c->hba->isr; + for(i = 0; cause; i++){ + m = 1 << i; + if((cause & m) == 0) + continue; + cause &= ~m; + d = c->rawdrive + i; + ilock(d); + if(d->port->isr && c->pi & m) + updatedrive(d); + c->hba->isr = m; + iunlock(d); + } + iunlock(c); +} + +/* returns locked list! */ +static Alist* +ahcibuild(Aportm *m, int rw, void *data, uint n, vlong lba) +{ + uchar *c; + uint flags; + Alist *l; + + l = m->list; + c = m->ctab->cfis; + rwfis(m, c, rw, n, lba); + flags = Lpref; + if(rw) + flags |= Lwrite; + mkalist(m, flags, data, 512*n); + return l; +} + +static int +waitready(Drive *d) +{ + ulong s, i, δ; + + for(i = 0; i < 15000; i += 250){ + if(d->state == Dreset || d->state == Dportreset || + d->state == Dnew) + return 1; + δ = Ticks - d->lastseen; + if(d->state == Dnull || δ > 10*1000) + return -1; + ilock(d); + s = d->port->sstatus; + iunlock(d); + if((s & Imask) == 0 && δ > 1500) + return -1; + if(d->state == Dready && (s & Smask) == Sphylink) + return 0; + esleep(250); + } + print("%s: not responding; offline\n", dnam(d)); + ilock(d); + d->state = Doffline; + iunlock(d); + return -1; +} + +static int +lockready(Drive *d) +{ + int i; + + qlock(&d->portm); + while ((i = waitready(d)) == 1) { + qunlock(&d->portm); + esleep(1); + qlock(&d->portm); + } + return i; +} + +static int +io(Drive *d, uint proto, int to, int) +{ + uint task, flag, rv; + Aport *p; + Asleep as; + + switch(waitready(d)){ + case -1: + return SDeio; + case 1: + return SDretry; + } + + ilock(d); + d->portm.flag = 0; + iunlock(d); + p = d->port; + p->ci = 1; + + as.p = p; + as.i = 1; + d->totick = 0; + if(to > 0) + d->totick = Ticks + MS2TK(to) | 1; /* fix fencepost */ + d->active++; + + xsleep(&d->portm, ahciclear, &as); + + d->active--; + ilock(d); + flag = d->portm.flag; + task = d->port->task; + iunlock(d); + + rv = SDok; + if(proto & Ppkt){ + rv = task >> 8 + 4 & 0xf; + flag &= ~Fahdrs; + flag |= Fdone; + }else if(task & (Efatal<<8) || task & (ASbsy|ASdrq) && d->state == Dready){ + d->port->ci = 0; + ahcirecover(&d->portc); + task = d->port->task; + flag &= ~Fdone; /* either an error or do-over */ + } + if(flag == 0){ + print("%s: retry\n", dnam(d)); + return SDretry; + } + if(flag & (Fahdrs | Ferror)){ + if((task & Eidnf) == 0) + print("%s: i/o error %ux\n", dnam(d), task); + return SDcheck; + } + return rv; +} + +static int +rw(Drive *d, int rw, uchar *a, ulong len, uvlong lba) +{ + int n, try, status, max, count; + uchar *data; + Ctlr *c; + + count = len / d->secsize; + c = d->ctlr; + if(d->portm.feat & Datapi){ + print("%s: no atapi support\n", dnam(d)); + return -1; + } + + max = 128; + if(d->portm.feat & Dllba){ + max = 8192; /* ahci maximum */ + if(c->type->type == Tsb600) + max = 255; /* errata */ + } + data = a; + for(try = 0; try < 10; esleep(50)){ + n = count; + if(n > max) + n = max; + qlock(&d->portm); + ahcibuild(&d->portm, rw, data, n, lba); + status = io(d, Pdma, 5000, 0); + qunlock(&d->portm); + switch(status){ + case SDeio: + return -1; + case SDretry: + try++; + continue; + } + try = 0; + count -= n; + lba += n; + data += n * d->secsize; + if(count == 0) + return data - (uchar*)a; + } + print("%s: bad disk\n", dnam(d)); + return -1; +} + +/* configure drives 0-5 as ahci sata (c.f. errata) */ +static int +iaahcimode(Pcidev *p) +{ + uint u; + + u = pcicfgr16(p, 0x92); + dprint("ahci: iaahcimode %.2ux %.4ux\n", pcicfgr8(p, 0x91), u); + pcicfgw16(p, 0x92, u | 0xf); /* ports 0-15 (sic) */ + return 0; +} + +enum{ + Ghc = 0x04/4, /* global host control */ + Pi = 0x0c/4, /* ports implemented */ + Cmddec = 1<<15, /* enable command block decode */ + + /* Ghc bits */ + Ahcien = 1<<31, /* ahci enable */ +}; + +static void +iasetupahci(Ctlr *c) +{ + pcicfgw16(c->pci, 0x40, pcicfgr16(c->pci, 0x40) & ~Cmddec); + pcicfgw16(c->pci, 0x42, pcicfgr16(c->pci, 0x42) & ~Cmddec); + + c->lmmio[Ghc] |= Ahcien; + c->lmmio[Pi] = (1 << 6) - 1; /* 5 ports (supposedly ro pi reg) */ + + /* enable ahci mode; from ich9 datasheet */ + pcicfgw16(c->pci, 0x90, 1<<6 | 1<<5); +} + +static void +sbsetupahci(Pcidev *p) +{ + print("sbsetupahci: tweaking %.4ux ccru %.2ux ccrp %.2ux\n", + p->did, p->ccru, p->ccrp); + pcicfgw8(p, 0x40, pcicfgr8(p, 0x40) | 1); + pcicfgw8(p, PciCCRu, 6); + pcicfgw8(p, PciCCRp, 1); + p->ccru = 6; + p->ccrp = 1; +} + +static ushort itab[] = { + 0xfffc, 0x2680, Tesb, + 0xfffb, 0x27c1, Tahci, /* 82801g[bh]m */ + 0xffff, 0x2821, Tahci, /* 82801h[roh] */ + 0xfffe, 0x2824, Tahci, /* 82801h[b] */ + 0xfeff, 0x2829, Tahci, /* ich8 */ + 0xfffe, 0x2922, Tahci, /* ich9 */ + 0xffff, 0x3a02, Tahci, /* 82801jd/do */ + 0xfefe, 0x3a22, Tahci, /* ich10, pch */ + 0xfff7, 0x3b28, Tahci, /* pchm */ + 0xfffe, 0x3b22, Tahci, /* pch */ +}; + +static int +didtype(Pcidev *p) +{ + int type, i; + + type = Tahci; + switch(p->vid){ + default: + return -1; + case 0x8086: + for(i = 0; i < nelem(itab); i += 3) + if((p->did & itab[i]) == itab[i+1]) + return itab[i+2]; + break; + case 0x1002: + if(p->ccru == 1 || p->ccrp != 1) + if(p->did == 0x4380 || p->did == 0x4390) + sbsetupahci(p); + type = Tsb600; + break; + case 0x1106: + /* + * unconfirmed report that the programming + * interface is set incorrectly. + */ + if(p->did == 0x3349) + return Tahci; + break; + case 0x10de: + case 0x1039: + case 0x1b4b: + case 0x11ab: + break; + case 0x197b: + case 0x10b9: + type = Tjmicron; + break; + } + if(p->ccrb == Pcibcstore && (uchar)p->ccru == 6 && p->ccrp == 1) + return type; + return -1; +} + +static void +iapnp(void) +{ + int i, n, nunit, type; + uintmem io; + Ctlr *c; + Pcidev *p; + Drive *d; + + memset(olds, 0xff, sizeof olds); + p = nil; +loop: + while((p = pcimatch(p, 0, 0)) != nil){ + if((type = didtype(p)) == -1) + continue; + if(p->mem[Abar].bar == 0) + continue; + if(niactlr == NCtlr){ + print("iapnp: %s: too many controllers\n", cttab[type].name); + break; + } + c = iactlr + niactlr; + memset(c, 0, sizeof *c); + io = p->mem[Abar].bar & ~0xfull; + c->mmio = vmap(io, p->mem[Abar].size); + if(c->mmio == nil){ + print("%s: address %#p in use did %.4ux\n", + tnam(c), io, p->did); + continue; + } + c->lmmio = (u32int*)c->mmio; + c->pci = p; + c->type = cttab + type; + pcisetbme(c->pci); + + if(intel(c) && p->did != 0x2681) + iasetupahci(c); + ahcibioshandoff((Ahba*)c->mmio); +// ahcihbareset((Ahba*)c->mmio); + nunit = ahciconf(c); + c->pi = c->hba->pi; + if(0 && p->vid == 0x1002 && p->did == 0x4391){ + c->pi = 0x3f; /* noah's opteron */ + nunit = 6; + } + if(intel(c) && iaahcimode(p) == -1 || nunit < 1){ + vunmap(c->mmio, p->mem[Abar].size); + continue; + } + c->ndrive = nunit; + + i = (c->hba->cap >> 21) & 1; + print("%s: sata-%s with %d ports\n", + tnam(c), "I\0II" + i*2, nunit); + + /* map the drives -- they don't all need to be enabled. */ + memset(c->rawdrive, 0, sizeof c->rawdrive); + n = 0; + for(i = 0; i < NCtlrdrv; i++){ + d = c->rawdrive + i; + d->portno = i; + d->driveno = -1; + d->sectors = 0; + d->serial[0] = ' '; + d->ctlr = c; + if((c->pi & 1<name, sizeof d->name, "a%d", niadrive + n); + d->port = (Aport*)(c->mmio + 0x80*i + 0x100); + d->portc.p = d->port; + d->portc.m = &d->portm; + d->driveno = n++; + c->drive[d->driveno] = d; + iadrive[niadrive + d->driveno] = d; + } + for(i = 0; i < n; i++) + if(ahciidle(c->drive[i]->port) == -1){ + print("%s: port %d wedged; abort\n", + tnam(c), i); + goto loop; + } + for(i = 0; i < n; i++){ + c->drive[i]->mode = DMautoneg; + configdrive(c->drive[i]); + } + + intrenable(p->intl, iainterrupt, c, p->tbdf, "iasata"); + ahcienable(c->hba); + + // do we want to do this here? + for(i = 0; i < n; i++) + checkdrive(c->drive[i], i); + + niadrive += nunit; + niactlr++; + } + userinit(satakproc, 0, "iasata"); +} + +static void +statc(Ctlr *c) +{ + Drive *d; + int j; + + for(j = 0; j < c->ndrive; j++){ + d = c->drive[j]; + if(d->fflag == 0) + continue; + print("%s:\n", dnam(d)); + print(" r\t%W\n", d->rate+Read); + print(" w\t%W\n", d->rate+Write); + print(" r %uld w %uld\n", d->reads, d->writes); + } +} + +static void +cmd_stat(int, char*[]) +{ + int i; + + for(i = 0; i < niactlr; i++) + statc(iactlr+i); +} + +static Drive* +iadev(Device *d) +{ + int i, j; + Drive *dr; + + i = d->wren.ctrl; + j = d->wren.targ; + + for(; i < niactlr; i++){ + if(j < iactlr[i].ndrive){ + dr = iactlr[i].drive[j]; + if(dr->state&Dready) + return dr; + return 0; + } + j -= iactlr[i].ndrive; + } + panic("ia: bad drive %Z\n", d); + return 0; +} + +void +iainit0(void) +{ + fmtinstall(L'σ', fmtσ); + iapnp(); + if(niactlr > 0){ + cmd_install("statr", "-- intel sata stats", cmd_stat); + } +} + +void +iainit(Device *dv) +{ + Drive *d; + vlong s; + char *lba; + static int once; + + if(once++ == 0) + iainit0(); + +top: + d = iadev(dv); + if(d == nil || d->secsize == 0){ + print("\t\t" "a%d.%d.%d not ready yet\n", dv->wren.ctrl, dv->wren.targ, dv->wren.lun); + + /* can't waitmsec(); what if no u? */ + for(int i = 0; i < 500; i++) + delay(1); + goto top; + } + + if(d->init++ == 0){ + dofilter(d->rate+Read); + dofilter(d->rate+Write); + } + + s = d->sectors; + lba = ""; + if(d->portm.feat&Dllba) + lba = "L"; + print("\t\t" "%lld sectors/%lld blocks %sLBA\n", s, s/(RBUFSIZE/d->secsize), lba); + d->lastseen = Ticks; /* hack around boot timing */ +} + + +Devsize +iasize(Device *dv) +{ + Drive *d; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 0; + + return d->sectors/(RBUFSIZE/d->secsize); +} + +int +iaread(Device *dv, Devsize b, void *c) +{ + Drive *d; + int rv; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 1; + + rv = rw(d, 0, c, RBUFSIZE, b*(RBUFSIZE/d->secsize)); + if(rv != RBUFSIZE) + return 1; + d->rate[Read].count++; + d->reads++; + d->fflag = 1; + return 0; +} + +int +iawrite(Device *dv, Devsize b, void *c) +{ + Drive *d; + int rv; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 1; + + rv = rw(d, 1, c, RBUFSIZE, b*(RBUFSIZE/d->secsize)); + if(rv != RBUFSIZE) + return 1; + d->rate[Write].count++; + d->writes++; + d->fflag = 1; + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,320 @@ +enum { + VectorNMI = 2, /* non-maskable interrupt */ + VectorBPT = 3, /* breakpoint */ + VectorUD = 6, /* invalid opcode exception */ + VectorCNA = 7, /* coprocessor not available */ + Vector2F = 8, /* double fault */ + VectorCSO = 9, /* coprocessor segment overrun */ + VectorPF = 14, /* page fault */ + Vector15 = 15, /* reserved */ + VectorCERR = 16, /* coprocessor error */ + VectorSIMD = 19, /* SIMD error */ + + VectorPIC = 32, /* external i8259 interrupts */ + IrqCLOCK = 0, + IrqKBD = 1, + IrqUART1 = 3, + IrqUART0 = 4, + IrqPCMCIA = 5, + IrqFLOPPY = 6, + IrqLPT = 7, + IrqIRQ7 = 7, + IrqAUX = 12, /* PS/2 port */ + IrqIRQ13 = 13, /* coprocessor on 386 */ + IrqATA0 = 14, + IrqATA1 = 15, + MaxIrqPIC = 15, + + VectorLAPIC = VectorPIC+16, /* local APIC interrupts */ + IrqLINT0 = VectorLAPIC+0, + IrqLINT1 = VectorLAPIC+1, + IrqTIMER = VectorLAPIC+2, + IrqERROR = VectorLAPIC+3, + IrqPCINT = VectorLAPIC+4, + IrqSPURIOUS = VectorLAPIC+15, + MaxIrqLAPIC = VectorLAPIC+15, + + VectorSYSCALL = 64, + + VectorAPIC = 65, /* external APIC interrupts */ + MaxVectorAPIC = 255, +}; + +enum { + IdtPIC = 32, /* external i8259 interrupts */ + + IdtLINT0 = 48, /* local APIC interrupts */ + IdtLINT1 = 49, + IdtTIMER = 50, + IdtERROR = 51, + IdtPCINT = 52, + + IdtIPI = 62, + IdtSPURIOUS = 63, + + IdtSYSCALL = 64, + + IdtIOAPIC = 65, /* external APIC interrupts */ + + IdtMAX = 255, +}; + +typedef struct Vkey Vkey; +typedef struct Vctl Vctl; + +struct Vkey { + int tbdf; /* pci: ioapic or msi sources */ + int irq; /* 8259-emulating sources */ +}; + +struct Vctl { + Vctl* next; /* handlers on this vector */ + + int isintr; /* interrupt or fault/trap */ + int affinity; /* processor affinity (-1 for none) */ + + Vkey; /* source-specific key; tbdf for pci */ + void (*f)(Ureg*, void*); /* handler to call */ + void* a; /* argument to call it with */ + char name[NAMELEN]; /* of driver */ + char *type; + int (*isr)(int); /* get isr bit for this irq */ + int (*eoi)(int); /* eoi */ + int (*mask)(Vkey*, int); /* interrupt enable returns masked vector */ + int vno; /* cpu vector */ +}; + +enum{ + Pat = 1<<16, + Mmx = 1<<23, + Sse2 = 1<<26, +}; + +void* intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +int intraffinity(void*); + +#define NVRAUTHADDR 0 + +enum { + MaxEther = 8, +}; + +enum { + BusCBUS = 0, /* Corollary CBUS */ + BusCBUSII, /* Corollary CBUS II */ + BusEISA, /* Extended ISA */ + BusFUTURE, /* IEEE Futurebus */ + BusINTERN, /* Internal bus */ + BusISA, /* Industry Standard Architecture */ + BusMBI, /* Multibus I */ + BusMBII, /* Multibus II */ + BusMCA, /* Micro Channel Architecture */ + BusMPI, /* MPI */ + BusMPSA, /* MPSA */ + BusNUBUS, /* Apple Macintosh NuBus */ + BusPCI, /* Peripheral Component Interconnect */ + BusPCMCIA, /* PC Memory Card International Association */ + BusTC, /* DEC TurboChannel */ + BusVL, /* VESA Local bus */ + BusVME, /* VMEbus */ + BusXPRESS, /* Express System Bus */ +}; + +#define MKBUS(t,b,d,f) (((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8)) +#define BUSFNO(tbdf) (((tbdf)>>8)&0x07) +#define BUSDNO(tbdf) (((tbdf)>>11)&0x1F) +#define BUSBNO(tbdf) (((tbdf)>>16)&0xFF) +#define BUSTYPE(tbdf) ((tbdf)>>24) +#define BUSBDF(tbdf) ((tbdf)&0x00FFFF00) +#define BUSUNKNOWN (-1) + +/* + * PCI support code. + */ +enum { /* type 0 and type 1 pre-defined header */ + PciVID = 0x00, /* vendor ID */ + PciDID = 0x02, /* device ID */ + PciPCR = 0x04, /* command */ + PciPSR = 0x06, /* status */ + PciRID = 0x08, /* revision ID */ + PciCCRp = 0x09, /* programming interface class code */ + PciCCRu = 0x0A, /* sub-class code */ + PciCCRb = 0x0B, /* base class code */ + PciCLS = 0x0C, /* cache line size */ + PciLTR = 0x0D, /* latency timer */ + PciHDT = 0x0E, /* header type */ + PciBST = 0x0F, /* BIST */ + + PciBAR0 = 0x10, /* base address */ + PciBAR1 = 0x14, + + PciINTL = 0x3C, /* interrupt line */ + PciINTP = 0x3D, /* interrupt pin */ +}; + +/* capabilities */ +enum { + PciCapPMG = 0x01, /* power management */ + PciCapAGP = 0x02, + PciCapVPD = 0x03, /* vital product data */ + PciCapSID = 0x04, /* slot id */ + PciCapMSI = 0x05, + PciCapCHS = 0x06, /* compact pci hot swap */ + PciCapPCIX = 0x07, + PciCapHTC = 0x08, /* hypertransport irq conf */ + PciCapVND = 0x09, /* vendor specific information */ + PciCapPCIe = 0x10, + PciCapMSIX = 0x11, + PciCapSATA = 0x12, + PciCapHSW = 0x0c, /* hot swap */ +}; + +/* ccrb (base class code) values; controller types */ +enum { + Pcibcpci1 = 0, /* pci 1.0; no class codes defined */ + Pcibcstore = 1, /* mass storage */ + Pcibcnet = 2, /* network */ + Pcibcdisp = 3, /* display */ + Pcibcmmedia = 4, /* multimedia */ + Pcibcmem = 5, /* memory */ + Pcibcbridge = 6, /* bridge */ + Pcibccomm = 7, /* simple comms (e.g., serial) */ + Pcibcbasesys = 8, /* base system */ + Pcibcinput = 9, /* input */ + Pcibcdock = 0xa, /* docking stations */ + Pcibcproc = 0xb, /* processors */ + Pcibcserial = 0xc, /* serial bus (e.g., USB) */ + Pcibcwireless = 0xd, /* wireless */ + Pcibcintell = 0xe, /* intelligent i/o */ + Pcibcsatcom = 0xf, /* satellite comms */ + Pcibccrypto = 0x10, /* encryption/decryption */ + Pcibcdacq = 0x11, /* data acquisition & signal proc. */ +}; + +/* ccru (sub-class code) values; common cases only */ +enum { + /* mass storage */ + Pciscscsi = 0, /* SCSI */ + Pciscide = 1, /* IDE (ATA) */ + + /* network */ + Pciscether = 0, /* Ethernet */ + + /* display */ + Pciscvga = 0, /* VGA */ + Pciscxga = 1, /* XGA */ + Pcisc3d = 2, /* 3D */ + + /* bridges */ + Pcischostpci = 0, /* host/pci */ + Pciscpcicpci = 1, /* pci/pci */ + + /* simple comms */ + Pciscserial = 0, /* 16450, etc. */ + Pciscmultiser = 1, /* multiport serial */ + + /* serial bus */ + Pciscusb = 3, /* USB */ +}; + +enum { /* type 0 pre-defined header */ + PciBAR2 = 0x18, + PciBAR3 = 0x1C, + PciBAR4 = 0x20, + PciBAR5 = 0x24, + PciCIS = 0x28, /* cardbus CIS pointer */ + PciSVID = 0x2C, /* subsystem vendor ID */ + PciSID = 0x2E, /* cardbus CIS pointer */ + PciEBAR0 = 0x30, /* expansion ROM base address */ + PciMGNT = 0x3E, /* burst period length */ + PciMLT = 0x3F, /* maximum latency between bursts */ +}; + +enum { /* type 1 pre-defined header */ + PciPBN = 0x18, /* primary bus number */ + PciSBN = 0x19, /* secondary bus number */ + PciUBN = 0x1A, /* subordinate bus number */ + PciSLTR = 0x1B, /* secondary latency timer */ + PciIBR = 0x1C, /* I/O base */ + PciILR = 0x1D, /* I/O limit */ + PciSPSR = 0x1E, /* secondary status */ + PciMBR = 0x20, /* memory base */ + PciMLR = 0x22, /* memory limit */ + PciPMBR = 0x24, /* prefetchable memory base */ + PciPMLR = 0x26, /* prefetchable memory limit */ + PciPUBR = 0x28, /* prefetchable base upper 32 bits */ + PciPULR = 0x2C, /* prefetchable limit upper 32 bits */ + PciIUBR = 0x30, /* I/O base upper 16 bits */ + PciIULR = 0x32, /* I/O limit upper 16 bits */ + PciEBAR1 = 0x28, /* expansion ROM base address */ + PciBCR = 0x3E, /* bridge control register */ +}; + +typedef struct Pcidev Pcidev; +typedef struct Pcidev { + int tbdf; /* type+bus+device+function */ + ushort vid; /* vendor ID */ + ushort did; /* device ID */ + + struct { + uintmem bar; /* base address */ + int size; + } mem[6], rom, ioa, mema; + + uchar rid; + uchar ccrp; + uchar ccrb; + uchar intl; /* interrupt line */ + uchar ccru; + ushort pcr; + uchar cls; + uchar ltr; + + Pcidev* list; + Pcidev* bridge; /* down a bus */ + Pcidev* link; /* next device on this bno */ +} Pcidev; + +typedef struct Pcisiz Pcisiz; +struct Pcisiz{ + Pcidev* dev; + int siz; + int bar; +}; + +int pcicap(Pcidev*, int); +int pcicfgr8(Pcidev*, int); +int pcicfgr16(Pcidev*, int); +int pcicfgr32(Pcidev*, int); +void pcicfgw8(Pcidev*, int, int); +void pcicfgw16(Pcidev*, int, int); +void pcicfgw32(Pcidev*, int, int); +void pciclrmwi(Pcidev*); +void pcihinv(Pcidev*, uint); +Pcidev* pcimatch(Pcidev*, int, int); +Pcidev* pcimatchtbdf(int); +void pcireset(void); +void pcisetbme(Pcidev*); +void pciclrbme(Pcidev*); + +enum { + Npciopt = 10, + Pcioptlen = 32, +}; + +typedef struct Pciconf Pciconf; +struct Pciconf { + char type[NAMELEN]; + uintmem port; + + int irq; + + int nopt; + char opt[Npciopt][Pcioptlen]; +}; + +extern int pciconfig(char*, int, Pciconf*); +#define PCIWINDOW 0 +#define PCIWADDR(va) (PADDR(va)+PCIWINDOW) +#define Pciwaddrl(va) ((u32int)PCIWADDR(va)) +#define Pciwaddrh(va) ((u32int)(PCIWADDR(va)>>32)) --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,549 @@ +#include "all.h" + +#include "apic.h" +#include "io.h" +//#include "adr.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +typedef struct Rbus Rbus; +typedef struct Rdt Rdt; + +struct Rbus { + Rbus *next; + int bustype; + int devno; + Rdt *rdt; +}; + +struct Rdt { + Apic *apic; + int intin; + u32int lo; + + int ref; /* could map to multiple busses */ + int enabled; /* times enabled */ +}; + +enum { /* IOAPIC registers */ + Ioregsel = 0x00, /* indirect register address */ + Iowin = 0x04, /* indirect register data */ + Ioipa = 0x08, /* IRQ Pin Assertion */ + Ioeoi = 0x10, /* EOI */ + + Ioapicid = 0x00, /* Identification */ + Ioapicver = 0x01, /* Version */ + Ioapicarb = 0x02, /* Arbitration */ + Ioabcfg = 0x03, /* Boot Coniguration */ + Ioredtbl = 0x10, /* Redirection Table */ +}; + +static Rdt rdtarray[Nrdt]; +static int nrdtarray; +static Rbus* rdtbus[Nbus]; +static Rdt* rdtvecno[IdtMAX+1]; + +static Lock idtnolock; +static int idtno = IdtIOAPIC; + +static Apic xioapic[Napic]; +static int isabusno = -1; + +/* BOTCH: no need for this concept; we've got the bustype */ +static void +ioapicisabus(int busno) +{ + if(isabusno != -1){ + if(busno == isabusno) + return; + print("ioapic: isabus redefined: %d ↛ %d\n", isabusno, busno); +////// return; + } + print("ioapic: isa busno %d\n", busno); + isabusno = busno; +} + +Apic* +ioapiclookup(uint id) +{ + Apic *a; + + if(id > nelem(xioapic)) + return nil; + a = xioapic + id; + if(a->useable) + return a; + return nil; +} + +int +gsitoapicid(int gsi, uint *intin) +{ + int i; + Apic *a; + + for(i=0; iuseable) + continue; + if(gsi >= a->ibase && gsi < a->ibase+a->nrdt){ + if(intin != nil) + *intin = gsi - a->ibase; + return a - xioapic; + } + } + print("gsitoapicid: no ioapic found for gsi %d\n", gsi); + return -1; +} + +static void +rtblget(Apic* apic, int sel, u32int* hi, u32int* lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + *hi = apic->addr[Iowin]; + apic->addr[Ioregsel] = sel; + *lo = apic->addr[Iowin]; +} + +static void +rtblput(Apic* apic, int sel, u32int hi, u32int lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + apic->addr[Iowin] = hi; + apic->addr[Ioregsel] = sel; + apic->addr[Iowin] = lo; +} + +Rdt* +rdtlookup(Apic *apic, int intin) +{ + int i; + Rdt *r; + + for(i = 0; i < nrdtarray; i++){ + r = rdtarray + i; + if(apic == r->apic && intin == r->intin) + return r; + } + return nil; +} + +void +ioapicintrinit(int bustype, int busno, int apicno, int intin, int devno, u32int lo) +{ + Rbus *rbus; + Rdt *rdt; + Apic *apic; + + if(busno >= Nbus || apicno >= Napic || nrdtarray >= Nrdt) + return; + + if(bustype == BusISA) + ioapicisabus(busno); + + apic = &xioapic[apicno]; + if(!apic->useable || intin >= apic->nrdt) + panic("ioapic: intrinit: usable %d nrdt %d: bus %d apic %d intin %d dev %d lo %.8ux\n", + apic->useable, apic->nrdt, busno, apicno, intin, devno, lo); + + rdt = rdtlookup(apic, intin); + if(rdt == nil){ + if(nrdtarray == nelem(rdtarray)){ + print("ioapic: intrinit: rdtarray too small\n"); + return; + } + rdt = &rdtarray[nrdtarray++]; + rdt->apic = apic; + rdt->intin = intin; + rdt->lo = lo; + }else{ + if(lo != rdt->lo){ + print("mutiple irq botch bus %d %d/%d/%d lo %.8ux vs %.8ux\n", + busno, apicno, intin, devno, lo, rdt->lo); + return; + } + DBG("dup rdt %d %d %d %d %.8ux\n", busno, apicno, intin, devno, lo); + } + rdt->ref++; + rbus = ialloc(sizeof *rbus, 0); + rbus->rdt = rdt; + rbus->bustype = bustype; + rbus->devno = devno; + rbus->next = rdtbus[busno]; + rdtbus[busno] = rbus; +} + +/* + * deal with ioapics at the same physical address. seen on + * certain supermicro atom systems. the hope is that only + * one will be used, and it will be the second one initialized. + * (the pc kernel ignores this issue.) it could be that mp and + * acpi have different numbering? + */ +static Apic* +dupaddr(uintmem pa) +{ + int i; + Apic *p; + + for(i = 0; i < nelem(xioapic); i++){ + p = xioapic + i; + if(p->paddr == pa) + return p; + } + return nil; +} + +Apic* +ioapicinit(int id, int ibase, uintmem pa) +{ + Apic *apic, *p; + static int base; + + /* + * Mark the IOAPIC useable if it has a good ID + * and the registers can be mapped. + */ + if(id >= Napic) + return nil; + if((apic = xioapic+id)->useable) + return apic; + + if((p = dupaddr(pa)) != nil){ + print("ioapic%d: same pa as apic%ld\n", id, p-xioapic); + if(ibase != -1) + return nil; /* mp irqs reference mp apic#s */ + apic->addr = p->addr; + } + else{ +// adrmapck(pa, 1024, Aapic, Mfree); /* not in adr? */ + if((apic->addr = vmap(pa, 1024)) == nil){ + print("ioapic%d: can't vmap %#P\n", id, pa); + return nil; + } + } + apic->useable = 1; + apic->paddr = pa; + + /* + * Initialise the I/O APIC. + * The MultiProcessor Specification says it is the + * responsibility of the O/S to set the APIC ID. + */ + lock(apic); + apic->addr[Ioregsel] = Ioapicver; + apic->nrdt = (apic->addr[Iowin]>>16 & 0xff) + 1; + if(ibase != -1) + apic->ibase = ibase; + else{ + apic->ibase = base; + base += apic->nrdt; + } + apic->addr[Ioregsel] = Ioapicid; + apic->addr[Iowin] = id<<24; + unlock(apic); + + return apic; +} + +static void +·ioapicdump(void) +{ + int i, n; + Rbus *rbus; + Rdt *rdt; + Apic *apic; + u32int hi, lo; + + for(i = 0; i < Napic; i++){ + apic = &xioapic[i]; + if(!apic->useable || apic->addr == 0) + continue; + print("ioapic %d addr %#p nrdt %d ibase %d\n", + i, apic->addr, apic->nrdt, apic->ibase); + for(n = 0; n < apic->nrdt; n++){ + lock(apic); + rtblget(apic, n, &hi, &lo); + unlock(apic); + print(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); + prflush(); + } + } + for(i = 0; i < Nbus; i++){ + if((rbus = rdtbus[i]) == nil) + continue; + print("iointr bus %d:\n", i); + for(; rbus != nil; rbus = rbus->next){ + rdt = rbus->rdt; + print(" apic %ld devno %#ux (%d %d) intin %d lo %#ux ref %d\n", + rdt->apic-xioapic, rbus->devno, rbus->devno>>2, + rbus->devno & 0x03, rdt->intin, rdt->lo, rdt->ref); + prflush(); + } + } +} + +void +ioapicdump(void) +{ + if(DBGFLG) + ·ioapicdump(); +} + +void +cmd_ioapicdump(int, char**) +{ + ·ioapicdump(); +} + +void +ioapiconline(void) +{ + int i; + Apic *apic; + + for(apic = xioapic; apic < &xioapic[Napic]; apic++){ + if(!apic->useable || apic->addr == nil) + continue; + for(i = 0; i < apic->nrdt; i++){ + lock(apic); + rtblput(apic, i, 0, Im); + unlock(apic); + } + } + cmd_install("ioapic", "-- ioapic dump", cmd_ioapicdump); +} + +static int +ioapicintrdd(u32int* hi, u32int* lo) +{ + Apic *lapic; + Mach *mach; + static int i; + + /* + * Set delivery mode (lo) and destination field (hi) + * + * Currently, assign each interrupt to a different CPU + * using physical mode delivery. Using the topology + * (packages/cores/threads) could be helpful. + */ + for(;; i = (i+1) % Napic){ + if((lapic = lapiclookup(i)) == nil) + continue; + if((mach = sys->machptr[lapic->machno]) == nil) + continue; + if(mach->online) + break; + } + *hi = i++<<24; + *lo |= Pm|MTf; + return mach->machno; +} + +int +nextvec(void) +{ + uint vecno; + + lock(&idtnolock); + vecno = idtno; + idtno = (idtno+8) % IdtMAX; + if(idtno < IdtIOAPIC) + idtno += IdtIOAPIC; + unlock(&idtnolock); + + return vecno; +} + +static int +msimask(Vkey *v, int mask) +{ + Pcidev *p; + + p = pcimatchtbdf(v->tbdf); + if(p == nil) + return -1; + return pcimsimask(p, mask); +} + +static int +intrenablemsi(Vctl* v, Pcidev *p) +{ + uint vno, lo, hi; + uvlong msivec; + + vno = nextvec(); + + lo = IPlow | TMedge | vno; + v->affinity = ioapicintrdd(&hi, &lo); + + if(lo & Lm) + lo |= MTlp; + + msivec = (uvlong)hi<<32 | lo; + if(pcimsienable(p, msivec) == -1) + return -1; + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vno; + v->type = "msi"; + v->mask = msimask; + + DBG("msiirq: %τ: enabling %.16llux %s irq %d vno %d\n", p->tbdf, msivec, v->name, v->irq, vno); + return vno; +} + +int +disablemsi(Vctl*, Pcidev *p) +{ + if(p == nil) + return -1; + return pcimsimask(p, 1); +} + +int +ioapicintrenable(Vctl* v) +{ + Rbus *rbus; + Rdt *rdt; + u32int hi, lo; + int bustype, busno, devno, vecno; + + if(v->tbdf == BUSUNKNOWN){ + if(v->irq >= IrqLINT0 && v->irq <= MaxIrqLAPIC){ + if(v->irq != IrqSPURIOUS) + v->isr = lapiceoi; + v->type = "lapic"; + return v->irq; + } + else{ + /* + * Legacy ISA. + * Make a busno and devno using the + * ISA bus number and the irq. + */ + if(isabusno == -1) + panic("no ISA bus allocated"); + busno = isabusno; + devno = v->irq; + bustype = BusISA; + } + } + else if((bustype = BUSTYPE(v->tbdf)) == BusPCI){ + /* + * PCI. + * Make a devno from BUSDNO(tbdf) and pcidev->intp. + */ + Pcidev *pcidev; + + busno = BUSBNO(v->tbdf); + if((pcidev = pcimatchtbdf(v->tbdf)) == nil) + panic("no PCI dev for tbdf %τ", v->tbdf); + if((vecno = intrenablemsi(v, pcidev)) != -1) + return vecno; + disablemsi(v, pcidev); + if((devno = pcicfgr8(pcidev, PciINTP)) == 0) + panic("no INTP for tbdf %τ", v->tbdf); + devno = BUSDNO(v->tbdf)<<2|(devno-1); + DBG("ioapicintrenable: tbdf %τ busno %d devno %d\n", + v->tbdf, busno, devno); + } + else{ + SET(busno, devno); + panic("unknown tbdf %τ", v->tbdf); + } + + rdt = nil; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno && rbus->bustype == bustype){ + rdt = rbus->rdt; + break; + } + if(rdt == nil){ + /* + * First crack in the smooth exterior of the new code: + * some BIOS make an MPS table where the PCI devices are + * just defaulted to ISA. + * Rewrite this to be cleaner. + */ + if((busno = isabusno) == -1) + return -1; + devno = v->irq<<2; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; + break; + } + DBG("isa: tbdf %τ busno %d devno %d %#p\n", + v->tbdf, busno, devno, rdt); + } + if(rdt == nil) + return -1; + + /* + * Assume this is a low-frequency event so just lock + * the whole IOAPIC to initialise the RDT entry + * rather than putting a Lock in each entry. + */ + lock(rdt->apic); + DBG("%τ: %ld/%d/%d (%d)\n", v->tbdf, rdt->apic - xioapic, rbus->devno, rdt->intin, devno); + if((rdt->lo & 0xff) == 0){ + vecno = nextvec(); + rdt->lo |= vecno; + rdtvecno[vecno] = rdt; + }else + DBG("%τ: mutiple irq bus %d dev %d\n", v->tbdf, busno, devno); + + rdt->enabled++; + lo = (rdt->lo & ~Im); + v->affinity = ioapicintrdd(&hi, &lo); + rtblput(rdt->apic, rdt->intin, hi, lo); + vecno = lo & 0xff; + unlock(rdt->apic); + + DBG("busno %d devno %d hi %#.8ux lo %#.8ux vecno %d\n", + busno, devno, hi, lo, vecno); + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vecno; + v->type = "ioapic"; + + return vecno; +} + +int +ioapicintrdisable(int vecno) +{ + Rdt *rdt; + + /* + * FOV. Oh dear. This isn't very good. + * Fortunately rdtvecno[vecno] is static + * once assigned. + * Must do better. + * + * What about any pending interrupts? + */ + if(vecno < 0 || vecno > MaxVectorAPIC){ + panic("ioapicintrdisable: vecno %d out of range", vecno); + return -1; + } + if((rdt = rdtvecno[vecno]) == nil){ + panic("ioapicintrdisable: vecno %d has no rdt", vecno); + return -1; + } + + lock(rdt->apic); + rdt->enabled--; + if(rdt->enabled == 0) + rtblput(rdt->apic, rdt->intin, 0, rdt->lo); + unlock(rdt->apic); + + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,332 @@ +#include "all.h" +#include "io.h" +#include "ureg.h" + +enum { + Data= 0x60, /* data port */ + + Status= 0x64, /* status port */ + Inready= 0x01, /* input character ready */ + Outbusy= 0x02, /* output busy */ + Sysflag= 0x04, /* system flag */ + Cmddata= 0x08, /* cmd==0, data==1 */ + Inhibit= 0x10, /* keyboard/mouse inhibited */ + Minready= 0x20, /* mouse character ready */ + Rtimeout= 0x40, /* general timeout */ + Parity= 0x80, + + Cmd= 0x64, /* command port (write only) */ + + CTdata= 0x0, /* chips & Technologies ps2 data port */ + CTstatus= 0x1, /* chips & Technologies ps2 status port */ + Enable= 1<<7, + Clear= 1<<6, + Error= 1<<5, + Intenable= 1<<4, + Reset= 1<<3, + Tready= 1<<2, + Rready= 1<<1, + Idle= 1<<0, + + Spec= 0x80, + + PF= Spec|0x20, /* num pad function key */ + View= Spec|0x00, /* view (shift window up) */ + KF= Spec|0x40, /* function key */ + Shift= Spec|0x60, + Break= Spec|0x61, + Ctrl= Spec|0x62, + Latin= Spec|0x63, + Caps= Spec|0x64, + Num= Spec|0x65, + Middle= Spec|0x66, + No= 0x00, /* peter */ + + Home= KF|13, + Up= KF|14, + Pgup= KF|15, + Print= KF|16, + Left= View, + Right= View, + End= '\r', + Down= View, + Pgdown= View, + Ins= KF|20, + Del= 0x7F, + + Rbutton=4, + Mbutton=2, + Lbutton=1, +}; + +uchar kbtab[] = +{ +[0x00] No, 0x1b, '1', '2', '3', '4', '5', '6', +[0x08] '7', '8', '9', '0', '-', '=', '\b', '\t', +[0x10] 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +[0x18] 'o', 'p', '[', ']', '\n', Ctrl, 'a', 's', +[0x20] 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +[0x28] '\'', '`', Shift, '\\', 'z', 'x', 'c', 'v', +[0x30] 'b', 'n', 'm', ',', '.', '/', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, KF|12, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', Del, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +}; + +uchar kbtabshift[] = +{ +[0x00] No, 0x1b, '!', '@', '#', '$', '%', '^', +[0x08] '&', '*', '(', ')', '_', '+', '\b', '\t', +[0x10] 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +[0x18] 'O', 'P', '{', '}', '\n', Ctrl, 'A', 'S', +[0x20] 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +[0x28] '"', '~', Shift, '|', 'Z', 'X', 'C', 'V', +[0x30] 'B', 'N', 'M', '<', '>', '?', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, KF|12, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +}; + +uchar kbtabesc1[] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Latin, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +}; + +static uchar ccc; +static int shift; + +enum +{ + /* controller command byte */ + Cscs1= (1<<6), /* scan code set 1 */ + Cmousedis= (1<<5), /* mouse disable */ + Ckbddis= (1<<4), /* kbd disable */ + Csf= (1<<2), /* system flag */ + Cmouseint= (1<<1), /* mouse interrupt enable */ + Ckbdint= (1<<0), /* kbd interrupt enable */ +}; + +/* + * wait for output no longer busy + */ +static int +outready(void) +{ + int tries; + + for(tries = 0; (inb(Status) & Outbusy); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * wait for input + */ +static int +inready(void) +{ + int tries; + + for(tries = 0; !(inb(Status) & Inready); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * ask 8042 to enable the use of address bit 20 + */ +void +i8042a20(void) +{ + outready(); + outb(Cmd, 0xD1); + outready(); + outb(Data, 0xDF); + outready(); +} + +/* + * ask 8042 to reset the machine + */ +void +i8042reset(void) +{ + ushort *s = (ushort*)(KZERO+0x472); + + *s = 0x1234; /* BIOS warm-boot flag */ + + outready(); + outb(Cmd, 0xFE); /* pulse reset line (means resend on AT&T machines) */ + outready(); +} + +/* + * keyboard processing + */ +int +kbdintr0(void) +{ + int s, c; + static int esc1, esc2; + static int caps; + static int ctl; + static int num; + int keyup; + + /* + * get status + */ + s = inb(Status); + if(!(s&Inready)) + return -1; + + /* + * get the character + */ + c = inb(Data); + + /* + * e0's is the first of a 2 character sequence + */ + if(c == 0xe0){ + esc1 = 1; + return -1; + } else if(c == 0xe1){ + esc2 = 2; + return -1; + } + + keyup = c&0x80; + c &= 0x7f; + if(c > sizeof kbtab){ + print("unknown key %ux\n", c|keyup); + return -1; + } + + if(esc1){ + c = kbtabesc1[c]; + esc1 = 0; + } else if(esc2){ + esc2--; + return -1; + } else if(shift) + c = kbtabshift[c]; + else + c = kbtab[c]; + + if(caps && c<='z' && c>='a') + c += 'A' - 'a'; + + /* + * keyup only important for shifts + */ + if(keyup){ + switch(c){ + case Shift: + shift = 0; + break; + case Ctrl: + ctl = 0; + break; + } + return -1; + } + + /* + * normal character + */ + if(!(c & Spec)){ + if(ctl) + c &= 0x1f; + return c; + } else { + switch(c){ + case Caps: + caps ^= 1; + return -1; + case Num: + num ^= 1; + return -1; + case Shift: + shift = 1; + return -1; + case Ctrl: + ctl = 1; + return -1; + } + } + return -1; +} + +static void +kbdintr(Ureg *ur, void *v) +{ + int c; + + USED(ur, v); + if((c = kbdintr0()) >= 0) + kbdchar(c); +} + +int +kbdgetc(void) +{ + int c; + + if((c = kbdintr0()) < 0) + return 0; + return c; +} + +void +kbdinit(void) +{ + int c; + + intrenable(IrqKBD, kbdintr, 0, BUSUNKNOWN, "kbd"); + + /* wait for a quiescent controller */ + while((c = inb(Status)) & (Outbusy | Inready)) + if(c & Inready) + inb(Data); + + /* get current controller command byte */ + outb(Cmd, 0x20); + if(inready() < 0){ + print("kbdinit: can't read ccc\n"); + ccc = 0; + } else + ccc = inb(Data); + + /* enable kbd xfers and interrupts */ + ccc &= ~Ckbddis; + ccc |= Csf | Ckbdint | Cscs1; + if(outready() < 0) + print("kbd init failed\n"); + outb(Cmd, 0x60); + if(outready() < 0) + print("kbd init failed\n"); + outb(Data, ccc); + outready(); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,230 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $32 + +#define pFARJMP32(s, o) BYTE $0xea; /* far jump to ptr32:16 */\ + LONG $o; WORD $s + +/* + * Enter here in 32-bit protected mode. Welcome to 1982. + * Make sure the GDT is set as it should be: + * disable interrupts; + * load the GDT with the table in _gdt32p; + * load all the data segments + * load the code segment via a far jump. + */ +TEXT _protected<>(SB), 1, $-4 + CLI + BYTE $0xe9; LONG $0x4c; /* JMP _endofheader */ + +_startofheader: + BYTE $0x90 /* NOP */ + BYTE $0x90 /* NOP */ + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(2*8-1) + QUAD $_gdt64<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +_endofheader: + MOVL AX, BP /* possible passed-in magic */ + + MOVL $_gdtptr32p<>-KZERO(SB), AX + MOVL (AX), GDTR + + MOVL $SSEL(SiDS, SsTIGDT|SsRPL0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + pFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _warp64<>-KZERO(SB)) + +/* + * Make the basic page tables for CPU0 to map 0-4MiB physical + * to KZERO, and include an identity map for the switch from protected + * to paging mode. There's an assumption here that the creation and later + * removal of the identity map will not interfere with the KZERO mappings; + * the conditions for clearing the identity map are + * clear PML4 entry when (KZERO & 0x0000ff8000000000) != 0; + * clear PDP entry when (KZERO & 0x0000007fc0000000) != 0; + * don't clear PD entry when (KZERO & 0x000000003fe00000) == 0; + * the code below assumes these conditions are met. + * + * Assume a recent processor with Page Size Extensions + * and use two 2MiB entries. + */ +/* + * The layout is decribed in dat.h: + * - MACHSTKSZ stack + * - PTSZ PT for PMAPADDR unused - assumes in KZERO PD + * - PTSZ PD + * - PTSZ PDP + * - PTSZ PML4 + * - 4*KiB vsvmpage for gdt, tss + * - MACHSZ m + * - 4*KiB syspage + * - 4*KiB ptrpage + * - 4*KiB unused + * - 4*KiB unused + * _protected: start of kernel text + */ + +/* + * Macros for accessing page table entries; change the + * C-style array-index macros into a page table byte offset + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _warp64<>(SB), 1, $-4 + MOVL $_protected<>-(MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ+KZERO)(SB), SI + + MOVL SI, DI + XORL AX, AX + MOVL $((MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ)>>2), CX + + CLD + REP; STOSL /* stack, P*, vsvm, m, sys */ + + MOVL SI, AX /* sys-KZERO */ + ADDL $(MACHSTKSZ), AX /* PML4 */ + MOVL AX, CR3 /* load the mmu */ + MOVL AX, DX + ADDL $(PTSZ|PteRW|PteP), DX /* PDP at PML4 + PTSZ */ + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + MOVL DX, PML4O(KZERO)(AX) /* PML4E for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PDP at PML4 + PTSZ */ + ADDL $PTSZ, DX /* PD at PML4 + 2*PTSZ */ + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL DX, PDPO(KZERO)(AX) /* PDPE for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + MOVL $(PtePS|PteRW|PteP), DX + MOVL DX, PDO(0)(AX) /* PDE for identity 0-2MiB */ + + MOVL AX, CX + ADDL $PDO(KZERO), CX +memloop: + MOVL DX, 0(CX) + ADDL $PGLSZ(1), DX + ADDL $8, CX + CMPL DX, $INIMAP + JLT memloop + + MOVL AX, DX /* PD at PML4 + 2*PTSZ */ + ADDL $(PTSZ|PteRW|PteP), DX /* PT at PML4 + 3*PTSZ */ + MOVL DX, PDO(PMAPADDR)(AX) /* PDE for PMAPADDR */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* sys-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX + MOVQ AX, sys(SB) /* sys */ + + ADDQ $(MACHSTKSZ), AX /* PML4 and top of stack */ + MOVQ AX, SP /* set stack */ + +_zap0pml4: + CMPQ DX, $PML4O(KZERO) /* KZERO & 0x0000ff8000000000 */ + JEQ _zap0pdp + MOVQ DX, PML4O(0)(AX) /* zap identity map PML4E */ +_zap0pdp: + ADDQ $PTSZ, AX /* PDP at PML4 + PTSZ */ + CMPQ DX, $PDPO(KZERO) /* KZERO & 0x0000007fc0000000 */ + JEQ _zap0pd + MOVQ DX, PDPO(0)(AX) /* zap identity map PDPE */ +_zap0pd: + ADDQ $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + CMPQ DX, $PDO(KZERO) /* KZERO & 0x000000003fe00000 */ + JEQ _zap0done + MOVQ DX, PDO(0)(AX) /* zap identity map PDE */ +_zap0done: + + ADDQ $(MACHSTKSZ), SI /* PML4-KZERO */ + MOVQ SI, CR3 /* flush TLB */ + + ADDQ $(2*PTSZ+4*KiB), AX /* PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + CALL main(SB) + +TEXT ndnr(SB), 1, $-4 /* no deposit, no return */ +_dnr: + STI + HLT + JMP _dnr /* do not resuscitate */ --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,13 @@ +TEXT cpuid(SB), 1, $0 + MOVQ RARG, BP + MOVL 0(BP), AX + MOVL 4(BP), BX + MOVL 8(BP), CX + MOVL 12(BP), DX + CPUID + MOVQ RARG, BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,341 @@ +/* + * Interrupt/exception handling. + */ +#include "amd64l.h" + +MODE $64 + +TEXT _intrp<>(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* idthandlers(SB) PC */ + JMP _intrcommon + +TEXT _intre<>(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_intrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) /* old CS */ + JEQ _intrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + +// SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_intrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL trap(SB) + +TEXT _intrr<>(SB), 1, $-4 /* so ktrace can pop frame */ + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) + JEQ _iretnested + +// SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_iretnested: + ADDQ $40, SP + IRETQ + +TEXT idthandlers(SB), 1, $-4 + CALL _intrp<>(SB); BYTE $IdtDE /* #DE Divide-by-Zero Error */ + CALL _intrp<>(SB); BYTE $IdtDB /* #DB Debug */ + CALL _intrp<>(SB); BYTE $IdtNMI /* #NMI Borked */ + CALL _intrp<>(SB); BYTE $IdtBP /* #BP Breakpoint */ + CALL _intrp<>(SB); BYTE $IdtOF /* #OF Overflow */ + CALL _intrp<>(SB); BYTE $IdtBR /* #BR Bound-Range */ + CALL _intrp<>(SB); BYTE $IdtUD /* #UD Invalid-Opcode */ + CALL _intrp<>(SB); BYTE $IdtNM /* #NM Device-Not-Available */ + CALL _intre<>(SB); BYTE $IdtDF /* #DF Double-Fault */ + CALL _intrp<>(SB); BYTE $Idt09 /* reserved */ + CALL _intre<>(SB); BYTE $IdtTS /* #TS Invalid-TSS */ + CALL _intre<>(SB); BYTE $IdtNP /* #NP Segment-Not-Present */ + CALL _intre<>(SB); BYTE $IdtSS /* #SS Stack */ + CALL _intre<>(SB); BYTE $IdtGP /* #GP General-Protection */ + CALL _intre<>(SB); BYTE $IdtPF /* #PF Page-Fault */ + CALL _intrp<>(SB); BYTE $Idt0F /* reserved */ + CALL _intrp<>(SB); BYTE $IdtMF /* #MF x87 FPE-Pending */ + CALL _intre<>(SB); BYTE $IdtAC /* #AC Alignment-Check */ + CALL _intrp<>(SB); BYTE $IdtMC /* #MC Machine-Check */ + CALL _intrp<>(SB); BYTE $IdtXF /* #XF SIMD Floating-Point */ + CALL _intrp<>(SB); BYTE $0x14 /* reserved */ + CALL _intrp<>(SB); BYTE $0x15 /* reserved */ + CALL _intrp<>(SB); BYTE $0x16 /* reserved */ + CALL _intrp<>(SB); BYTE $0x17 /* reserved */ + CALL _intrp<>(SB); BYTE $0x18 /* reserved */ + CALL _intrp<>(SB); BYTE $0x19 /* reserved */ + CALL _intrp<>(SB); BYTE $0x1a /* reserved */ + CALL _intrp<>(SB); BYTE $0x1b /* reserved */ + CALL _intrp<>(SB); BYTE $0x1c /* reserved */ + CALL _intrp<>(SB); BYTE $0x1d /* reserved */ + CALL _intrp<>(SB); BYTE $0x1e /* reserved */ + CALL _intrp<>(SB); BYTE $0x1f /* reserved */ + CALL _intrp<>(SB); BYTE $0x20 + CALL _intrp<>(SB); BYTE $0x21 + CALL _intrp<>(SB); BYTE $0x22 + CALL _intrp<>(SB); BYTE $0x23 + CALL _intrp<>(SB); BYTE $0x24 + CALL _intrp<>(SB); BYTE $0x25 + CALL _intrp<>(SB); BYTE $0x26 + CALL _intrp<>(SB); BYTE $0x27 + CALL _intrp<>(SB); BYTE $0x28 + CALL _intrp<>(SB); BYTE $0x29 + CALL _intrp<>(SB); BYTE $0x2a + CALL _intrp<>(SB); BYTE $0x2b + CALL _intrp<>(SB); BYTE $0x2c + CALL _intrp<>(SB); BYTE $0x2d + CALL _intrp<>(SB); BYTE $0x2e + CALL _intrp<>(SB); BYTE $0x2f + CALL _intrp<>(SB); BYTE $0x30 + CALL _intrp<>(SB); BYTE $0x31 + CALL _intrp<>(SB); BYTE $0x32 + CALL _intrp<>(SB); BYTE $0x33 + CALL _intrp<>(SB); BYTE $0x34 + CALL _intrp<>(SB); BYTE $0x35 + CALL _intrp<>(SB); BYTE $0x36 + CALL _intrp<>(SB); BYTE $0x37 + CALL _intrp<>(SB); BYTE $0x38 + CALL _intrp<>(SB); BYTE $0x39 + CALL _intrp<>(SB); BYTE $0x3a + CALL _intrp<>(SB); BYTE $0x3b + CALL _intrp<>(SB); BYTE $0x3c + CALL _intrp<>(SB); BYTE $0x3d + CALL _intrp<>(SB); BYTE $0x3e + CALL _intrp<>(SB); BYTE $0x3f + CALL _intrp<>(SB); BYTE $0x40 + CALL _intrp<>(SB); BYTE $0x41 + CALL _intrp<>(SB); BYTE $0x42 + CALL _intrp<>(SB); BYTE $0x43 + CALL _intrp<>(SB); BYTE $0x44 + CALL _intrp<>(SB); BYTE $0x45 + CALL _intrp<>(SB); BYTE $0x46 + CALL _intrp<>(SB); BYTE $0x47 + CALL _intrp<>(SB); BYTE $0x48 + CALL _intrp<>(SB); BYTE $0x49 + CALL _intrp<>(SB); BYTE $0x4a + CALL _intrp<>(SB); BYTE $0x4b + CALL _intrp<>(SB); BYTE $0x4c + CALL _intrp<>(SB); BYTE $0x4d + CALL _intrp<>(SB); BYTE $0x4e + CALL _intrp<>(SB); BYTE $0x4f + CALL _intrp<>(SB); BYTE $0x50 + CALL _intrp<>(SB); BYTE $0x51 + CALL _intrp<>(SB); BYTE $0x52 + CALL _intrp<>(SB); BYTE $0x53 + CALL _intrp<>(SB); BYTE $0x54 + CALL _intrp<>(SB); BYTE $0x55 + CALL _intrp<>(SB); BYTE $0x56 + CALL _intrp<>(SB); BYTE $0x57 + CALL _intrp<>(SB); BYTE $0x58 + CALL _intrp<>(SB); BYTE $0x59 + CALL _intrp<>(SB); BYTE $0x5a + CALL _intrp<>(SB); BYTE $0x5b + CALL _intrp<>(SB); BYTE $0x5c + CALL _intrp<>(SB); BYTE $0x5d + CALL _intrp<>(SB); BYTE $0x5e + CALL _intrp<>(SB); BYTE $0x5f + CALL _intrp<>(SB); BYTE $0x60 + CALL _intrp<>(SB); BYTE $0x61 + CALL _intrp<>(SB); BYTE $0x62 + CALL _intrp<>(SB); BYTE $0x63 + CALL _intrp<>(SB); BYTE $0x64 + CALL _intrp<>(SB); BYTE $0x65 + CALL _intrp<>(SB); BYTE $0x66 + CALL _intrp<>(SB); BYTE $0x67 + CALL _intrp<>(SB); BYTE $0x68 + CALL _intrp<>(SB); BYTE $0x69 + CALL _intrp<>(SB); BYTE $0x6a + CALL _intrp<>(SB); BYTE $0x6b + CALL _intrp<>(SB); BYTE $0x6c + CALL _intrp<>(SB); BYTE $0x6d + CALL _intrp<>(SB); BYTE $0x6e + CALL _intrp<>(SB); BYTE $0x6f + CALL _intrp<>(SB); BYTE $0x70 + CALL _intrp<>(SB); BYTE $0x71 + CALL _intrp<>(SB); BYTE $0x72 + CALL _intrp<>(SB); BYTE $0x73 + CALL _intrp<>(SB); BYTE $0x74 + CALL _intrp<>(SB); BYTE $0x75 + CALL _intrp<>(SB); BYTE $0x76 + CALL _intrp<>(SB); BYTE $0x77 + CALL _intrp<>(SB); BYTE $0x78 + CALL _intrp<>(SB); BYTE $0x79 + CALL _intrp<>(SB); BYTE $0x7a + CALL _intrp<>(SB); BYTE $0x7b + CALL _intrp<>(SB); BYTE $0x7c + CALL _intrp<>(SB); BYTE $0x7d + CALL _intrp<>(SB); BYTE $0x7e + CALL _intrp<>(SB); BYTE $0x7f + CALL _intrp<>(SB); BYTE $0x80 + CALL _intrp<>(SB); BYTE $0x81 + CALL _intrp<>(SB); BYTE $0x82 + CALL _intrp<>(SB); BYTE $0x83 + CALL _intrp<>(SB); BYTE $0x84 + CALL _intrp<>(SB); BYTE $0x85 + CALL _intrp<>(SB); BYTE $0x86 + CALL _intrp<>(SB); BYTE $0x87 + CALL _intrp<>(SB); BYTE $0x88 + CALL _intrp<>(SB); BYTE $0x89 + CALL _intrp<>(SB); BYTE $0x8a + CALL _intrp<>(SB); BYTE $0x8b + CALL _intrp<>(SB); BYTE $0x8c + CALL _intrp<>(SB); BYTE $0x8d + CALL _intrp<>(SB); BYTE $0x8e + CALL _intrp<>(SB); BYTE $0x8f + CALL _intrp<>(SB); BYTE $0x90 + CALL _intrp<>(SB); BYTE $0x91 + CALL _intrp<>(SB); BYTE $0x92 + CALL _intrp<>(SB); BYTE $0x93 + CALL _intrp<>(SB); BYTE $0x94 + CALL _intrp<>(SB); BYTE $0x95 + CALL _intrp<>(SB); BYTE $0x96 + CALL _intrp<>(SB); BYTE $0x97 + CALL _intrp<>(SB); BYTE $0x98 + CALL _intrp<>(SB); BYTE $0x99 + CALL _intrp<>(SB); BYTE $0x9a + CALL _intrp<>(SB); BYTE $0x9b + CALL _intrp<>(SB); BYTE $0x9c + CALL _intrp<>(SB); BYTE $0x9d + CALL _intrp<>(SB); BYTE $0x9e + CALL _intrp<>(SB); BYTE $0x9f + CALL _intrp<>(SB); BYTE $0xa0 + CALL _intrp<>(SB); BYTE $0xa1 + CALL _intrp<>(SB); BYTE $0xa2 + CALL _intrp<>(SB); BYTE $0xa3 + CALL _intrp<>(SB); BYTE $0xa4 + CALL _intrp<>(SB); BYTE $0xa5 + CALL _intrp<>(SB); BYTE $0xa6 + CALL _intrp<>(SB); BYTE $0xa7 + CALL _intrp<>(SB); BYTE $0xa8 + CALL _intrp<>(SB); BYTE $0xa9 + CALL _intrp<>(SB); BYTE $0xaa + CALL _intrp<>(SB); BYTE $0xab + CALL _intrp<>(SB); BYTE $0xac + CALL _intrp<>(SB); BYTE $0xad + CALL _intrp<>(SB); BYTE $0xae + CALL _intrp<>(SB); BYTE $0xaf + CALL _intrp<>(SB); BYTE $0xb0 + CALL _intrp<>(SB); BYTE $0xb1 + CALL _intrp<>(SB); BYTE $0xb2 + CALL _intrp<>(SB); BYTE $0xb3 + CALL _intrp<>(SB); BYTE $0xb4 + CALL _intrp<>(SB); BYTE $0xb5 + CALL _intrp<>(SB); BYTE $0xb6 + CALL _intrp<>(SB); BYTE $0xb7 + CALL _intrp<>(SB); BYTE $0xb8 + CALL _intrp<>(SB); BYTE $0xb9 + CALL _intrp<>(SB); BYTE $0xba + CALL _intrp<>(SB); BYTE $0xbb + CALL _intrp<>(SB); BYTE $0xbc + CALL _intrp<>(SB); BYTE $0xbd + CALL _intrp<>(SB); BYTE $0xbe + CALL _intrp<>(SB); BYTE $0xbf + CALL _intrp<>(SB); BYTE $0xc0 + CALL _intrp<>(SB); BYTE $0xc1 + CALL _intrp<>(SB); BYTE $0xc2 + CALL _intrp<>(SB); BYTE $0xc3 + CALL _intrp<>(SB); BYTE $0xc4 + CALL _intrp<>(SB); BYTE $0xc5 + CALL _intrp<>(SB); BYTE $0xc6 + CALL _intrp<>(SB); BYTE $0xc7 + CALL _intrp<>(SB); BYTE $0xc8 + CALL _intrp<>(SB); BYTE $0xc9 + CALL _intrp<>(SB); BYTE $0xca + CALL _intrp<>(SB); BYTE $0xcb + CALL _intrp<>(SB); BYTE $0xcc + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xcf + CALL _intrp<>(SB); BYTE $0xd0 + CALL _intrp<>(SB); BYTE $0xd1 + CALL _intrp<>(SB); BYTE $0xd2 + CALL _intrp<>(SB); BYTE $0xd3 + CALL _intrp<>(SB); BYTE $0xd4 + CALL _intrp<>(SB); BYTE $0xd5 + CALL _intrp<>(SB); BYTE $0xd6 + CALL _intrp<>(SB); BYTE $0xd7 + CALL _intrp<>(SB); BYTE $0xd8 + CALL _intrp<>(SB); BYTE $0xd9 + CALL _intrp<>(SB); BYTE $0xda + CALL _intrp<>(SB); BYTE $0xdb + CALL _intrp<>(SB); BYTE $0xdc + CALL _intrp<>(SB); BYTE $0xdd + CALL _intrp<>(SB); BYTE $0xde + CALL _intrp<>(SB); BYTE $0xdf + CALL _intrp<>(SB); BYTE $0xe0 + CALL _intrp<>(SB); BYTE $0xe1 + CALL _intrp<>(SB); BYTE $0xe2 + CALL _intrp<>(SB); BYTE $0xe3 + CALL _intrp<>(SB); BYTE $0xe4 + CALL _intrp<>(SB); BYTE $0xe5 + CALL _intrp<>(SB); BYTE $0xe6 + CALL _intrp<>(SB); BYTE $0xe7 + CALL _intrp<>(SB); BYTE $0xe8 + CALL _intrp<>(SB); BYTE $0xe9 + CALL _intrp<>(SB); BYTE $0xea + CALL _intrp<>(SB); BYTE $0xeb + CALL _intrp<>(SB); BYTE $0xec + CALL _intrp<>(SB); BYTE $0xed + CALL _intrp<>(SB); BYTE $0xee + CALL _intrp<>(SB); BYTE $0xef + CALL _intrp<>(SB); BYTE $0xf0 + CALL _intrp<>(SB); BYTE $0xf1 + CALL _intrp<>(SB); BYTE $0xf2 + CALL _intrp<>(SB); BYTE $0xf3 + CALL _intrp<>(SB); BYTE $0xf4 + CALL _intrp<>(SB); BYTE $0xf5 + CALL _intrp<>(SB); BYTE $0xf6 + CALL _intrp<>(SB); BYTE $0xf7 + CALL _intrp<>(SB); BYTE $0xf8 + CALL _intrp<>(SB); BYTE $0xf9 + CALL _intrp<>(SB); BYTE $0xfa + CALL _intrp<>(SB); BYTE $0xfb + CALL _intrp<>(SB); BYTE $0xfc + CALL _intrp<>(SB); BYTE $0xfd + CALL _intrp<>(SB); BYTE $0xfe + CALL _intrp<>(SB); BYTE $0xff --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,235 @@ +/* + * Start-up request IPI handler. + * + * This code is executed on an application processor in response to receiving + * a Start-up IPI (SIPI) from another processor. + * This must be placed on a 4KiB boundary + * somewhere in the 1st MiB of conventional memory. However, + * due to some shortcuts below it's restricted further to within the 1st 64KiB. + * The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KiB; + * CPL and IP set to 0. + */ +#include "mem.h" +#include "amd64l.h" + +/* + * Some machine instructions not handled well by [68][al]. + * This is a messy piece of code, requiring instructions in real mode, + * protected mode (+long mode on amd64). The MODE psuedo-op of 6[al] handles + * the latter two OK, but 'MODE $16' is incomplete, e.g. it does + * not truncate operands appropriately, hence the ugly 'rMOVAX' macro. + * Fortunately, the only other instruction executed in real mode that + * could cause a problem (ORL) is encoded such that it will work OK. + */ +#define DELAY BYTE $0xeb; /* JMP .+2 */ \ + BYTE $0x00 +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s + +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +/* + * Real mode. Welcome to 1978. + * Load a basic GDT, turn on protected mode and make + * inter-segment jump to the protected mode code. + */ +MODE $16 + +TEXT _real<>(SB), 1, $-4 + rFARJMP16(0, _endofheader<>-KZERO(SB)) /* */ + +_startofheader: + NOP; NOP; NOP + QUAD $0xa5a5a5a5a5a5a5a5 + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) /* includes long mode */ + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + QUAD $0x0000800000000000 /* DS */ + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +TEXT _endofheader<>(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $Pe, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rMOVAX (SSEL(SiDS, SsTIGDT|SsRPL0)) /* */ + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + rFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _protected<>-KZERO(SB)) + +/* + * Protected mode. Welcome to 1982. + * Get the local APIC ID from the memory mapped APIC; + * load the PML4 with the shared page table address; + * make an identity map for the inter-segment jump below, + * using the stack space to hold a temporary PDP and PD; + * enable and activate long mode; + * make an inter-segment jump to the long mode code. + */ +MODE $32 + +/* + * Macros for accessing page table entries; must turn + * the C-style array-index macros into a page table byte + * offset. + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _protected<>(SB), 1, $-4 + MOVL $0xfee00000, BP /* apicbase */ + MOVL 0x20(BP), BP /* Id */ + SHRL $24, BP /* becomes RARG later */ +//MOVL $_real<>-KZERO(SB), CX +//MOVL BX, -4(CX) +//_spin: JMP _spin + + MOVL $(0x00100000+MACHSTKSZ), SI /* page table PML4 */ + + MOVL SI, AX + MOVL AX, CR3 /* load the mmu */ + + MOVL AX, DX + SUBL $MACHSTKSZ, DX /* PDP for identity map */ + ADDL $(PteRW|PteP), DX + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + + SUBL $MACHSTKSZ, AX /* PDP for identity map */ + ADDL $PTSZ, DX + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL $(PtePS|PteRW|PteP), DX + ADDL $PTSZ, AX /* PD for identity map */ + MOVL DX, PDO(0)(AX) /* PDE for identity 0-[24]MiB */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack and call the + * C startup code in m->splpc. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* PML4-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + MOVQ DX, PML4O(0)(AX) /* zap identity map */ + + MOVQ SI, CR3 /* flush TLB */ +#ifndef UseOwnPageTables + /* + * SI still points to the base of the bootstrap + * processor page tables. + * Want to use that for clearing the identity map, + * but want to use the passed-in address for + * setting up the stack and Mach. + */ + MOVQ $_real<>(SB), AX + MOVL -4(AX), SI /* PML4 */ + MOVLQZX SI, SI /* PML4-KZERO */ +#endif + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + ADDQ $(4*PTSZ+4*KiB), AX /* PML4+PDP+PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX RARG, RARG /* APIC ID */ + PUSHQ RARG /* apicno */ + + MOVQ 8(RMACH), AX /* m->splpc */ + CALL* AX /* CALL squidboy(SB) */ + +_ndnr: + JMP _ndnr --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,456 @@ +#include "amd64l.h" + +MODE $64 + +/* + * Port I/O. + */ +TEXT inb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INB + RET + +TEXT insb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSB + RET + +TEXT ins(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INW + RET + +TEXT inss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSW + RET + +TEXT inl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + INL + RET + +TEXT insl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSL + RET + +TEXT outb(SB), 1, $-1 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL byte+8(FP), AX + OUTB + RET + +TEXT outsb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSB + RET + +TEXT outs(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL short+8(FP), AX + OUTW + RET + +TEXT outss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSW + RET + +TEXT outl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL long+8(FP), AX + OUTL + RET + +TEXT outsl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSL + RET + +/* + * Load/store segment descriptor tables: + * GDT - global descriptor table + * IDT - interrupt descriptor table + * TR - task register + * GDTR and LDTR take an m16:m64 argument, + * so shuffle the stack arguments to + * get it in the right format. + */ +TEXT gdtget(SB), 1, $-4 + MOVL GDTR, (RARG) /* Note: 10 bytes returned */ + RET + +TEXT gdtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + + MOVL (RARG), GDTR + + XORQ AX, AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + POPQ AX + MOVWQZX cs+16(FP), BX + PUSHQ BX + PUSHQ AX + RETFQ + +TEXT idtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + MOVL (RARG), IDTR + RET + +TEXT trput(SB), 1, $-4 + MOVW RARG, TASK + RET + +/* + * Read/write various system registers. + */ +TEXT getcr0(SB), 1, $-4 /* Processor Control */ + MOVQ CR0, AX + RET + +TEXT putcr0(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR0 + RET + +TEXT getcr2(SB), 1, $-4 /* #PF Linear Address */ + MOVQ CR2, AX + RET + +TEXT getcr3(SB), 1, $-4 /* PML4 Base */ + MOVQ CR3, AX + RET + +TEXT putcr3(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR3 + RET + +TEXT getcr4(SB), 1, $-4 /* Extensions */ + MOVQ CR4, AX + RET + +TEXT putcr4(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR4 + RET + +TEXT rdtsc(SB), 1, $-4 /* Time Stamp Counter */ + RDTSC + /* u64int rdtsc(void); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT rdmsr(SB), 1, $-4 /* Model-Specific Register */ + MOVL RARG, CX + + RDMSR + /* u64int rdmsr(u32int); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT wrmsr(SB), 1, $-4 + MOVL RARG, CX + MOVL lo+8(FP), AX + MOVL hi+12(FP), DX + + WRMSR + + RET + +TEXT invlpg(SB), 1, $-4 /* INVLPG va+0(FP) */ + MOVQ RARG, va+0(FP) + + INVLPG va+0(FP) + + RET + +TEXT wbinvd(SB), 1, $-4 + WBINVD + RET + +/* + * Serialisation. + */ +TEXT lfence(SB), 1, $-4 + LFENCE + RET + +TEXT mfence(SB), 1, $-4 + MFENCE + RET + +TEXT sfence(SB), 1, $-4 + SFENCE + RET + +/* + * Note: CLI and STI are not serialising instructions. + * Is that assumed anywhere? + */ +TEXT splhi(SB), 1, $-4 +_splhi: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JZ _alreadyhi /* use CMOVLEQ etc. here? */ + + MOVQ (SP), BX + MOVQ BX, 8(RMACH) /* save PC in m->splpc */ + +_alreadyhi: + CLI + RET + +TEXT spllo(SB), 1, $-4 +_spllo: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JNZ _alreadylo /* use CMOVLEQ etc. here? */ + + MOVQ $0, 8(RMACH) /* clear m->splpc */ + +_alreadylo: + STI + RET + +TEXT splx(SB), 1, $-4 + TESTQ $If, RARG /* If - Interrupt Flag */ + JNZ _spllo + JMP _splhi + +TEXT islo(SB), 1, $-4 + PUSHFQ + POPQ AX + ANDQ $If, AX /* If - Interrupt Flag */ + RET + +/* + * Synchronisation + */ +TEXT ainc(SB), 1, $-4 /* int ainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX /* overflow if -ve or 0 */ + JGT _return +_trap: + XORQ BX, BX + MOVQ (BX), BX /* over under sideways down */ +_return: + RET + +TEXT adec(SB), 1, $-4 /* int adec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX /* underflow if -ve */ + JLT _trap + + RET + +/* + * Semaphores rely on negative values for the counter, + * and don't have the same overflow/underflow conditions + * as ainc/adec. + */ +TEXT semainc(SB), 1, $-4 /* int semainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX + RET + +TEXT semadec(SB), 1, $-4 /* int semadec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX + RET + +TEXT tas32(SB), 1, $-4 + MOVL $0xdeaddead, AX + XCHGL AX, (RARG) /* */ + RET + +TEXT fas64(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + +TEXT cas32(SB), 1, $-4 + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +TEXT cas64(SB), 1, $-4 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * Label consists of a stack pointer and a programme counter + */ +TEXT gotolabel(SB), 1, $-4 + MOVQ 0(RARG), SP /* restore SP */ + MOVQ 8(RARG), AX /* put return PC on the stack */ + MOVQ AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVQ SP, 0(RARG) /* store SP */ + MOVQ 0(SP), BX /* store return PC */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT hardhalt(SB), 1, $-4 + STI + HLT + RET + +TEXT _monitor(SB), 1, $-4 /* void monitor(void*); */ + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + RET + +TEXT _mwait(SB), 1, $-4 /* void mwait(u32int); */ + MOVLQZX RARG, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + RET + +TEXT k10mwait+0(SB),0,$16 +k10mwloop: + MOVQ RARG, CX + MOVQ val+8(FP), DX + MOVQ (CX), AX + CMPQ AX, DX + JNE k10mwdone + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + MOVQ RARG, CX + MOVQ (CX),AX + MOVQ val+8(FP), DX + CMPQ AX, DX + JNE k10mwdone + XORQ DX, DX + XORQ CX, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + JMP k10mwloop +k10mwdone: + RET , + +TEXT mul64fract(SB), 1, $-4 + MOVQ a+8(FP), AX + MULQ b+16(FP) /* a*b */ + SHRQ $32, AX:DX + MOVQ AX, (RARG) + RET + +#define RDRANDAX BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 +#define RDRAND64AX BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 + +TEXT rdrand32(SB), $-4 +loop32: + RDRANDAX + JCC loop32 + RET + +TEXT rdrand64(SB), $-4 +loop64: + RDRAND64AX + JCC loop64 + RET + +TEXT rdrandbuf(SB), $0 + MOVQ RARG, DX + + MOVLQZX cnt+8(FP), CX + SHRQ $3, CX +eights: + CMPL CX, $0 + JLE f1 + CALL rdrand64(SB) + MOVQ AX, 0(DX) + ADDQ $8, DX + SUBL $1, CX + JMP eights + +f1: + MOVLQZX cnt+8(FP), CX + ANDL $7, CX + SHRQ $2, CX +fours: + CMPL CX, $0 + JLE f2 + CALL rdrand32(SB) + MOVL AX, 0(DX) + ADDQ $4, DX + SUBL $1, CX + JMP fours + +f2: + MOVLQZX cnt+8(FP), CX + ANDL $3, CX +ones: + CMPL CX, $0 + JLE f3 + CALL rdrand32(SB) + MOVB AX, 0(DX) + ADDQ $1, DX + SUBL $1, CX + JMP ones + +f3: + RET --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,416 @@ +#include "all.h" + +#include "apic.h" +#include "io.h" +#include "ureg.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +enum { /* Local APIC registers */ + Id = 0x0020, /* Identification */ + Ver = 0x0030, /* Version */ + Tp = 0x0080, /* Task Priority */ + Ap = 0x0090, /* Arbitration Priority */ + Pp = 0x00a0, /* Processor Priority */ + Eoi = 0x00b0, /* EOI */ + Ld = 0x00d0, /* Logical Destination */ + Df = 0x00e0, /* Destination Format */ + Siv = 0x00f0, /* Spurious Interrupt Vector */ + Is = 0x0100, /* Interrupt Status (8) */ + Tmode = 0x0180, /* Trigger Mode (8) */ + Ir = 0x0200, /* Interrupt Request (8) */ + Es = 0x0280, /* Error Status */ + Iclo = 0x0300, /* Interrupt Command */ + Ichi = 0x0310, /* Interrupt Command [63:32] */ + Lvt0 = 0x0320, /* Local Vector Table 0 */ + Lvt5 = 0x0330, /* Local Vector Table 5 */ + Lvt4 = 0x0340, /* Local Vector Table 4 */ + Lvt1 = 0x0350, /* Local Vector Table 1 */ + Lvt2 = 0x0360, /* Local Vector Table 2 */ + Lvt3 = 0x0370, /* Local Vector Table 3 */ + Tic = 0x0380, /* Timer Initial Count */ + Tcc = 0x0390, /* Timer Current Count */ + Tdc = 0x03e0, /* Timer Divide Configuration */ + + Tlvt = Lvt0, /* Timer */ + Lint0 = Lvt1, /* Local Interrupt 0 */ + Lint1 = Lvt2, /* Local Interrupt 1 */ + Elvt = Lvt3, /* Error */ + Pclvt = Lvt4, /* Performance Counter */ + Tslvt = Lvt5, /* Thermal Sensor */ +}; + +enum { /* Siv */ + Swen = 0x00000100, /* Software Enable */ + Fdis = 0x00000200, /* Focus Disable */ +}; + +enum { /* Iclo */ + Lassert = 0x00004000, /* Assert level */ + + DSnone = 0x00000000, /* Use Destination Field */ + DSself = 0x00040000, /* Self is only destination */ + DSallinc = 0x00080000, /* All including self */ + DSallexc = 0x000c0000, /* All Excluding self */ +}; + +enum { /* Tlvt */ + Periodic = 0x00020000, /* Periodic Timer Mode */ +}; + +enum { /* Tdc */ + DivX2 = 0x00000000, /* Divide by 2 */ + DivX4 = 0x00000001, /* Divide by 4 */ + DivX8 = 0x00000002, /* Divide by 8 */ + DivX16 = 0x00000003, /* Divide by 16 */ + DivX32 = 0x00000008, /* Divide by 32 */ + DivX64 = 0x00000009, /* Divide by 64 */ + DivX128 = 0x0000000a, /* Divide by 128 */ + DivX1 = 0x0000000b, /* Divide by 1 */ +}; + +static u32int* lapicbase; +static int lapmachno = 1; + +static Apic xlapic[Napic]; + +Apic* +lapiclookup(uint id) +{ + Apic *a; + + if(id > nelem(xlapic)) + return nil; + a = xlapic + id; + if(a->useable) + return a; + return nil; +} + +static u32int +lapicrget(int r) +{ + return lapicbase[r/4]; +} + +static void +lapicrput(int r, u32int data) +{ + lapicbase[r/4] = data; +} + +int +lapiceoi(int vecno) +{ + lapicrput(Eoi, 0); + return vecno; +} + +int +lapicisr(int vecno) +{ + int isr; + + isr = lapicrget(Is + (vecno/32)*16); + + return isr & (1<<(vecno%32)); +} + +static char* +lapicprint(char *p, char *e, Lapic *a, int i) +{ + char *s; + + s = "proc"; + p = seprint(p, e, "%-8s ", s); + p = seprint(p, e, "%8ux ", i); +// p = seprint(p, e, "%.8ux ", a->dest); +// p = seprint(p, e, "%.8ux ", a->mask); +// p = seprint(p, e, "%c", a->flags & PcmpBP? 'b': ' '); +// p = seprint(p, e, "%c ", a->flags & PcmpEN? 'e': ' '); +// p = seprint(p, e, "%8ux %8ux", a->lintr[0], a->lintr[1]); + p = seprint(p, e, "%12d\n", a->machno); + return p; +} + +void +lapicinit(int lapicno, uintmem pa, int isbp) +{ + Apic *apic; + + /* + * Mark the LAPIC useable if it has a good ID + * and the registers can be mapped. + * The LAPIC Extended Broadcast and ID bits in the HyperTransport + * Transaction Control register determine whether 4 or 8 bits + * are used for the LAPIC ID. There is also xLAPIC and x2LAPIC + * to be dealt with sometime. + */ + DBG("lapicinit: lapicno %d pa %#P isbp %d caller %#p\n", lapicno, pa, isbp, getcallerpc(&lapicno)); + + if(lapicno >= Napic){ + panic("lapicinit%d: out of range", lapicno); + return; + } + if((apic = &xlapic[lapicno])->useable){ + print("lapicinit%d: already initialised\n", lapicno); + return; + } + if(lapicbase == nil){ +// adrmapck(pa, 1024, Aapic, Mfree); + if((lapicbase = vmap(pa, 1024)) == nil){ + panic("lapicinit%d: can't map lapicbase %#P", lapicno, pa); + return; + } + DBG("lapicinit%d: lapicbase %#P -> %#p\n", lapicno, pa, lapicbase); + } + apic->useable = 1; + + /* + * Assign a machno to the processor associated with this + * LAPIC, it may not be an identity map. + * Machno 0 is always the bootstrap processor. + */ + if(isbp){ + apic->machno = 0; + m->apicno = lapicno; + } + else + apic->machno = lapmachno++; +} + +static void +lapicdump0(Apic *apic, int i) +{ + if(!apic->useable || apic->addr != 0) + return; + DBG("lapic%d: machno %d lint0 %#8.8ux lint1 %#8.8ux\n", + i, apic->machno, apic->lvt[0], apic->lvt[1]); + DBG(" tslvt %#8.8ux pclvt %#8.8ux elvt %#8.8ux\n", + lapicrget(Tslvt), lapicrget(Pclvt), lapicrget(Elvt)); + DBG(" tlvt %#8.8ux lint0 %#8.8ux lint1 %#8.8ux siv %#8.8ux\n", + lapicrget(Tlvt), lapicrget(Lint0), + lapicrget(Lint1), lapicrget(Siv)); +} + +void +lapicdump(void) +{ + int i; + + if(!DBGFLG) + return; + + DBG("lapicbase %#p lapmachno %d\n", lapicbase, lapmachno); + for(i = 0; i < Napic; i++) + lapicdump0(xlapic + i, i); +} + +static void +apictimer(Ureg* ureg, void*) +{ + clock(0, ureg->ip); +} + +int +lapiconline(void) +{ + Apic *apic; + u64int tsc; + u32int dfr, ver; + int apicno, nlvt; + + if(lapicbase == nil) + panic("lapiconline: no lapic base"); + + if((apicno = ((lapicrget(Id)>>24) & 0xff)) >= Napic) + panic("lapic: id too large %d", apicno); + if(apicno != m->apicno){ + panic("lapic: %d != %d", m->apicno, apicno); + dfr = lapicrget(Id) & ~(0xff<<24); + dfr |= m->apicno<<24; + lapicrput(Id, dfr); + apicno = m->apicno; + } + apic = &xlapic[apicno]; + if(!apic->useable || apic->addr != nil) + panic("lapiconline: lapic%d: useable %d addr %#p", + apicno, apic->useable, apic->addr); + + /* + * Things that can only be done when on the processor + * owning the APIC, apicinit above runs on the bootstrap + * processor. + */ + ver = lapicrget(Ver); + nlvt = ((ver>>16) & 0xff) + 1; + if(nlvt > nelem(apic->lvt)){ + print("lapiconline%d: nlvt %d > max (%d)\n", + apicno, nlvt, nelem(apic->lvt)); + nlvt = nelem(apic->lvt); + } + apic->nlvt = nlvt; + apic->ver = ver & 0xff; + + /* + * These don't really matter in Physical mode; + * set the defaults anyway. + */ +// if(memcmp(m->cpuinfo, "AuthenticAMD", 12) == 0) +// dfr = 0xf0000000; +// else + dfr = 0xffffffff; + lapicrput(Df, dfr); + lapicrput(Ld, 0x00000000); + + /* + * Disable interrupts until ready by setting the Task Priority + * register to 0xff. + */ + lapicrput(Tp, 0xff); + + /* + * Software-enable the APIC in the Spurious Interrupt Vector + * register and set the vector number. The vector number must have + * bits 3-0 0x0f unless the Extended Spurious Vector Enable bit + * is set in the HyperTransport Transaction Control register. + */ + lapicrput(Siv, Swen|IdtSPURIOUS); + + /* + * Acknowledge any outstanding interrupts. + */ + lapicrput(Eoi, 0); + + /* + * Use the TSC to determine the lapic timer frequency. + * It might be possible to snarf this from a chipset + * register instead. + */ + lapicrput(Tdc, DivX1); + lapicrput(Tlvt, Im); + tsc = rdtsc() + m->cpuhz/10; + lapicrput(Tic, 0xffffffff); + + while(rdtsc() < tsc) + ; + + apic->hz = (0xffffffff-lapicrget(Tcc))*10; + apic->max = apic->hz/HZ; + apic->min = apic->hz/(100*HZ); + apic->div = ((m->cpuhz/apic->max)+HZ/2)/HZ; + + if(m->machno == 0 || DBGFLG){ + print("lapic%d: hz %lld max %lld min %lld div %lld\n", apicno, + apic->hz, apic->max, apic->min, apic->div); + } + + /* + * Mask interrupts on Performance Counter overflow and + * Thermal Sensor if implemented, and on Lintr0 (Legacy INTR), + * and Lintr1 (Legacy NMI). + * Clear any Error Status (write followed by read) and enable + * the Error interrupt. + */ + switch(apic->nlvt){ + case 7: + case 6: + lapicrput(Tslvt, Im); + /*FALLTHROUGH*/ + case 5: + lapicrput(Pclvt, Im); + /*FALLTHROUGH*/ + default: + break; + } + lapicrput(Lint1, apic->lvt[1]|Im|IdtLINT1); + lapicrput(Lint0, apic->lvt[0]|Im|IdtLINT0); + + lapicrput(Es, 0); + lapicrget(Es); + lapicrput(Elvt, IdtERROR); + + /* + * Reload the timer to de-synchronise the processors, + * then lower the task priority to allow interrupts to be + * accepted by the APIC. + */ + microdelay((TK2MS(1)*1000/lapmachno) * m->machno); + lapicrput(Tic, apic->max); + + if(apic->machno == 0) + intrenable(IdtTIMER, apictimer, 0, -1, "APIC timer"); + lapicrput(Tlvt, Periodic|IrqTIMER); + if(m->machno == 0) + lapicrput(Tp, 0); + return 1; +} + +void +lapictimerset(uvlong next) +{ + Mpl pl; + Apic *apic; + vlong period; + + apic = &xlapic[(lapicrget(Id)>>24) & 0xff]; + + pl = splhi(); + lock(&m->apictimerlock); + + period = apic->max; + if(next != 0){ + period = next - rdtsc(); + period /= apic->div; + + if(period < apic->min) + period = apic->min; + else if(period > apic->max - apic->min) + period = apic->max; + } + lapicrput(Tic, period); + + unlock(&m->apictimerlock); + splx(pl); +} + +void +lapicsipi(int lapicno, uintmem pa) +{ + int i; + u32int crhi, crlo; + + /* + * SIPI - Start-up IPI. + * To do: checks on lapic validity. + */ + crhi = lapicno<<24; + lapicrput(Ichi, crhi); + lapicrput(Iclo, DSnone|TMlevel|Lassert|MTir); + microdelay(200); + lapicrput(Iclo, DSnone|TMlevel|MTir); + delay(10); + + crlo = DSnone|TMedge|MTsipi|((u32int)pa/(4*KiB)); + for(i = 0; i < 2; i++){ + lapicrput(Ichi, crhi); + lapicrput(Iclo, crlo); + microdelay(200); + } +} + +void +lapicipi(int lapicno) +{ + lapicrput(Ichi, lapicno<<24); + lapicrput(Iclo, DSnone|TMedge|Lassert|MTf|IdtIPI); + while(lapicrget(Iclo) & Ds) + ; +} + +void +lapicpri(int pri) +{ + lapicrput(Tp, pri); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,353 @@ + #include "all.h" + +#define deccnt(x) if(x) adec((int*)&x->nlock) +#define inccnt(x) if(x) ainc((int*)&x->nlock) + +void +printlocks(User *up) +{ + int i; + + for(i = 0; i < up->nlock; i++){ + print("%#p:%#p", up->lstack[i], up->pstack[i]); + if((i%4) == 0) + print("\n"); + } + if(i>0 && i%4) + print("\n"); +} + +void +lock(Lock *l) +{ + int i, nl; + uintptr pc; + + pc = getcallerpc(&l); + nl = 0; + if(u) + nl = u->nlock; +loop: + inccnt(u); /* prevent being scheded */ + if(tas(l) == 0) { + l->pc = pc; + if(u){ + u->lstack[nl] = l; + u->pstack[nl] = pc; + } + return; + } + deccnt(u); + + for(i = 0; i < 1000000; i++){ + if(l->sbsem) + continue; + inccnt(u); + if(tas(l) == 0) { + l->pc = pc; + if(u){ + u->lstack[nl] = l; + u->pstack[nl] = pc; + } + return; + } + deccnt(u); + } + l->sbsem = 0; // BOTCH + + print("lock loop %d:%#p called by %#p held by pc %#p\n", u?u->pid:-1, l, pc, l->pc); + if(u) + dumpstack(u); + dotrace(0); + if(islo()) + sched(); + else + print("ilock deadlock\n"); + goto loop; +} + +void +unlock(Lock *l) +{ + if(l->sbsem == 0) + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + l->pc = 0; + l->sbsem = 0; + coherence(); + + if(u && adec((int*)&u->nlock) == 0) + if(u->delaysched) + if(islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + u->delaysched = 0; + sched(); + } +} + +int +canlock(Lock *l) +{ + inccnt(u); + if(tas(l)){ + deccnt(u); + return 0; + } + l->pc = getcallerpc(&l); + return 1; +} + +void +ilock(Lock *l) +{ + uintptr pc; + Mpl x; + + pc = getcallerpc(&l); + + x = splhi(); + if(tas(l) == 0) + goto acquire; + + if(!l->isilock) + panic("ilock: not ilock %#p", pc); + if(l->m == MACHP(m->machno)) + panic("ilock: deadlock cpu%d pc %#p lpc %#p\n", m->machno, pc, l->pc); + for(;;){ + splx(x); + while(l->sbsem) + ; + x = splhi(); + if(tas(l) == 0) + goto acquire; + } +acquire: +// m->ilockdepth++; +// if(u) +// u->lastilock = l; + l->sr = x; + l->pc = pc; + l->p = u; + l->isilock = 1; + l->m = MACHP(m->machno); +} + +void +iunlock(Lock *l) +{ + Mpl sr; + + if(l->sbsem == 0) + panic("iunlock nolock: pc %#p", getcallerpc(&l)); + if(l->isilock == 0) + print("iunlock lock: pc %#p held by %#p\n", getcallerpc(&l), l->pc); + if(islo()) + print("iunlock lo: %#p held by %#p\n", getcallerpc(&l), l->pc); + + sr = l->sr; + l->m = 0; + l->sbsem = 0; +// m->ilockdepth--; + + coherence(); + +// if(u) +// u->lastilock = 0; + splx(sr); +} + +void +qlock(QLock *q) +{ + User *p; + int i; + + lock(q); + if(!q->locked){ + q->locked = 1; + unlock(q); + goto out; + } + if(u) { + for(i=0; ihas.q[i] == q) { + print("circular qlock by %d at %#p (other %#p, %#p\n", + u->pid, getcallerpc(&q), u->has.pc[i], q->pc); + dumpstack(u); + break; + } + } + p = q->tail; + if(p == 0) + q->head = u; + else + p->qnext = u; + q->tail = u; + u->qnext = 0; + u->state = Queueing; + u->has.want = q; + unlock(q); + sched(); + u->has.want = 0; + +out: + if(u) { + for(i=0; ihas.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&q); + return; + } + print("NHAS(%d) too small\n", NHAS); + } +} + +int +canqlock(QLock *q) +{ + int i; + + lock(q); + if(q->locked){ + unlock(q); + return 0; + } + q->locked = 1; + unlock(q); + + if(u){ + for(i=0; ihas.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&q); + return 1; + } + print("NHAS(%d) too small\n", NHAS); + } + return 1; +} + +void +qunlock(QLock *q) +{ + User *p; + int i; + + lock(q); + p = q->head; + if(p) { + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(q); + ready(p); + } else { + q->locked = 0; + unlock(q); + } + + if(u){ + for(i=0; ihas.q[i] == q) { + u->has.q[i] = 0; + return; + } + panic("qunlock: not there %#p, called from %#p\n", + q, getcallerpc(&q)); + } +} + +/* + * readers/writers lock + * allows 1 writer or many readers + */ +void +rlock(RWlock *l) +{ + QLock *q; + + qlock(&l->wr); /* wait here for writers and exclusion */ + + q = &l->rd; /* first reader in, qlock(&l->rd) */ + lock(q); + q->locked = 1; + l->nread++; + unlock(q); + + qunlock(&l->wr); + + if(u){ + int i; + int found; + + found = 0; + for(i=0; ihas.q[i] == q){ + print("circular rlock by %d at %#p (other %#p)\n", + u->pid, getcallerpc(&l), u->has.pc[i]); + dumpstack(u); + } + if(!found && u->has.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&l); + found = 1; + } + } + if(!found) + print("NHAS(%d) too small\n", NHAS); + } +} + +void +runlock(RWlock *l) +{ + QLock *q; + User *p; + int n; + + q = &l->rd; + lock(q); + n = l->nread - 1; + l->nread = n; + if(n == 0) { /* last reader out, qunlock(&l->rd) */ + p = q->head; + if(p) { + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(q); + ready(p); + goto accounting; + } + q->locked = 0; + } + unlock(q); + +accounting: + if(u){ + int i; + for(i=0; ihas.q[i] == q) { + u->has.q[i] = 0; + return; + } + panic("runlock: not there %#p, called from %#p\n", + q, getcallerpc(&l)); + } +} + +void +wlock(RWlock *l) +{ + qlock(&l->wr); /* wait here for writers and exclusion */ + qlock(&l->rd); /* wait here for last reader */ +} + +void +wunlock(RWlock *l) +{ + qunlock(&l->rd); + qunlock(&l->wr); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,195 @@ +#include "all.h" +#include "io.h" + +uint niob; +uint nhiob; +Hiob *hiob; + +/* + * this ugliness may be fixed by moving INIMAP to KSEG2 + */ +void* +kaddr(uintmem pa) +{ + uchar *a; + + a = UINT2PTR(pa); + if(pa < INIMAP) + return a+KZERO; + if(pa < KSEG2) + return a+KSEG2; + panic("kaddr: bad pa %#P\n", pa); + return 0; +} + +uintmem +paddr(void *va) +{ + uintmem a; + + a = PTR2UINT(va); + if(a >= KZERO && a < KZERO+INIMAP) + return a-KZERO; + if(a >= KSEG2) + return a-KSEG2; + panic("paddr: va %#p pa %#P @ %#p", va, mmuphysaddr(a), getcallerpc(&va)); + return 0; +} + +void +prbanks(void) +{ + Mbank *b; + int m; + + for(m = 0; m < mconf.nbank; m++){ + b = mconf.bank+m; + print("bank[%d]: base %#p, limit %#p\n", m, b->base, b->limit); + } +} + + +/* + * Called to allocate permanent data structures + * Alignment is in number of bytes. It pertains both to the start and + * end of the allocated memory. + */ +void* +ialloc(uintptr n, int align) +{ + Mbank *b; + uintmem p; + int m; + + ilock(&mconf); + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++){ + p = b->base; + + if(align <= 0) + align = sizeof(uintptr); + if(m = n % align) + n += align - m; + if(m = p % align) + p += align - m; + + if(p+n > b->limit) + continue; + + b->base = p+n; + iunlock(&mconf); + + /* this is really ugly */ + if(b == mconf.bank){ + memset((void*)(p+KZERO), 0, n); + return (void*)(p+KZERO); + } + memset((void*)(p+KSEG2), 0, n); + return (void*)(p+KSEG2); + } + + iunlock(&mconf); + + prbanks(); + panic("ialloc(%p, %d): out of memory: %#p nbank=%d\n", n, align, getcallerpc(&n), mconf.nbank); + return 0; +} + +static void +cmd_memory(int, char *[]) +{ + prbanks(); +} + +/* + * allocate rest of mem + * for io buffers. + */ +#define HWIDTH 8 /* buffers per hash */ +void +iobufinit(void) +{ + long i; + uintptr m, v; + Iobuf *p, *q; + Hiob *hp; + Mbank *b; + + wlock(&mainlock); /* init */ + wunlock(&mainlock); + + m = 0; + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++) + m += b->limit - b->base; + + m -= conf.sparemem; + + niob = m / (sizeof(Iobuf) + RBUFSIZE + sizeof(Hiob)/HWIDTH); + nhiob = niob / HWIDTH; + while(!prime(nhiob)) + nhiob++; + print(" %d buffers; %d hashes\n", niob, nhiob); + hiob = ialloc(nhiob * sizeof(Hiob), 0); + hp = hiob; + for(i=0; inamebuf, sizeof hp->namebuf, "hiob%uld\n", i); + hp->name = hp->namebuf; + qlock(hp); + qunlock(hp); + hp++; + } + p = ialloc(niob * sizeof(Iobuf), 0); + hp = hiob; + for(i=0; iname = "buf"; + snprint(p->namebuf, sizeof p->namebuf, "buf%uld", i); + p->name = p->namebuf; + qlock(p); + qunlock(p); + if(hp == hiob) + hp = hiob + nhiob; + hp--; + q = hp->link; + if(q) { + p->fore = q; + p->back = q->back; + q->back = p; + p->back->fore = p; + } else { + hp->link = p; + p->fore = p; + p->back = p; + } + p->dev = devnone; + p->addr = -1; + p->xiobuf = ialloc(RBUFSIZE, RBUFSIZE); + p->iobuf = (char*)-1; + p++; + } + + /* + * Make sure that no more of bank[0] can be used: + * 'check' will do an ialloc(0, 1) to find the base of + * sparemem. + */ + if(mconf.bank[0].limit < 1024*1024) + mconf.bank[0].base = mconf.bank[0].limit+1; + + v = 0; + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++) + v += b->limit - b->base; + print(" mem left = %,llud, out of %,llud\n", v, conf.mem); + /* paranoia: add this command as late as is easy */ + cmd_install("memory", "-- print ranges of memory banks", cmd_memory); +} + +void* +iobufmap(Iobuf *p) +{ + return p->iobuf = p->xiobuf; +} + +void +iobufunmap(Iobuf *p) +{ + p->iobuf = (char*)-1; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,77 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#define GiB 1073741824u /* Gibi 000000000040000000 */ +#define TiB 1099511627776ull /* Tebi 0x0000010000000000 */ +#define PiB 1125899906842624ull /* Pebi 0x0004000000000000 */ +#define EiB 1152921504606846976ull /* Exbi 0x1000000000000000 */ + +#define ALIGNED(p, a) (!(((uintptr)(p)) & ((a)-1))) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BY2WD 4 +#define BY2V 8 /* bytes per double word */ +#define BY2SE 8 /* bytes per stack element */ +#define BLOCKALIGN 8 + +/* + * 4K pages + */ +#define PGSZ (4*KiB) /* page size */ +#define PGSHIFT 12 /* log(PGSZ) */ +#define PTSZ (4*KiB) /* page table page size */ +#define PTSHIFT 9 /* */ + +#define MACHSZ (4*KiB) /* Mach+stack size */ +#define MACHMAX 32 /* max. number of cpus */ +#define MACHSTKSZ (6*(4*KiB)) /* Mach stack size */ + +#define KSTACK (16*1024) /* Size of Proc kernel stack */ +#define STACKALIGN(sp) ((sp) & ~(BY2SE-1)) /* bug: assure with alloc */ + +/* + * 2M pages + */ +#define BIGPGSHIFT 21 +#define BIGPGSZ (1ull<>(((l)*PTSHIFT)+PGSHIFT)) & ((1< $target + +sipi.$O: sipi.h --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,447 @@ +#include "all.h" + +#include "amd64.h" + +static int dbgflg = 0; +#define DBG(...) do{if(dbgflg)print(__VA_ARGS__);}while(0) + +static void* +malloc(usize bytes) +{ + void *va; + + va = ialloc(bytes, 0); + if(va == nil) + panic("malloc: %#p\n", getcallerpc(&bytes)); + return va; +} +void* +mallocalign(usize bytes, uint align, uint, uint) +{ + void *va; + + va = ialloc(bytes, align); + if(va == nil) + panic("mallocalign: %#p\n", getcallerpc(&bytes)); + return va; +} + +typedef struct Page Page; +struct Page +{ +// Lock; + uintmem pa; /* Physical address in memory */ + uintptr va; /* Virtual address for user */ +// uint daddr; /* Disc address on swap */ +// int ref; /* Reference count */ +// uchar modref; /* Simulated modify/reference bits */ +// int color; /* Cache coloring */ +// char cachectl[MACHMAX]; /* Cache flushing control for mmuput */ +// Image *image; /* Associated text or swap image */ +// Page *next; /* Lru free list */ +// Page *prev; +// Page *hash; /* Image hash chains */ +// int pgszi; /* size index in m->pgsz[] */ +}; + +/* + * To do: + * PteNX; + * mmukmapsync grot for >1 processor; + * mmuptcopy (PteSHARED trick?); + */ + +#define PPN(x) ((x)&~(PGSZ-1)) + +/* + * set up a pat mappings. the system depends + * on the first 4 mappings not changing. + */ +enum{ + Patmsr = 0x277, +}; + +static uchar pattab[8] = { + PATWB, + PATWT, + PATUCMINUS, + PATUC, + + PATWB, + PATWT, + PATUCMINUS, + PATUC, +}; + +static uint patflags[8] = { + 0, + PtePWT, + PtePCD, + PtePCD | PtePWT, + Pte4KPAT, + Pte4KPAT | PtePWT, + Pte4KPAT | PtePCD, + Pte4KPAT | PtePCD | PtePWT, +}; + +static void +setpatreg(int rno, int type) +{ + int i; + Mpl s; + u64int pat; + + s = splhi(); + pat = rdmsr(Patmsr); + pat &= ~(0xffull<machno == 0) + print("pat: %.16llux\n", pat); + for(i = 0; i < 64; i += 8) + pattab[i>>3] = pat>>i; +} + +static void +patinit(void) +{ + setpatreg(7, PATWC); +} + +/* adjust memory flags based on page table level (bits shift around) */ +static uint +memflagssz(uint flag, int ps) +{ + if(flag & Pte4KPAT && ps > 4*1024){ + flag &= ~Pte4KPAT; + flag |= Pte2MPAT | PtePS; + } + else if(ps > 4*1024) + flag |= PtePS; + return flag; +} + +void +dumpmmuwalk(uintmem addr) +{ + int l; + PTE *pte, *pml4; + + pml4 = UINT2PTR(m->pml4->va); + if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); +} + +static Lock mmukmaplock; +static Lock vmaplock; + +#define PML4X(v) PTLX((v), 3) +#define PDPX(v) PTLX((v), 2) +#define PDX(v) PTLX((v), 1) +#define PTX(v) PTLX((v), 0) + +int +mmukmapsync(uvlong va) +{ + USED(va); + + return 0; +} + +/* allocate page directories &c for vmaps */ +static uintmem +walkalloc(usize size) +{ + void *va; + + if((va = mallocalign(size, PTSZ, 0, 0)) != nil) + return PADDR(va); + panic("walkalloc: fail"); + return 0; +} + +uintptr +kseg2map(uintmem pa, uintmem len, uint basef) +{ + int i, l; + uintptr va; + uintmem mem, nextmem; + PTE *pte, *pml4; + + DBG("kseg2map: %#P %#P size %P\n", pa, pa+len, len); + pml4 = UINT2PTR(m->pml4->va); + va = KSEG2+pa; + for(mem = pa; mem < pa+len; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > pa+len) + continue; + if((l = mmuwalk(pml4, va, i, &pte, walkalloc)) != i){ + /* + * since we don't track vmap()s that overlap after + * rounding up to full pages, we have to be sloppy here + * and just say "close enough". should we check that the + * memory flags are the same? + */ + DBG("mmu: kseg2map: %#P: %d %d\n", mem, i, l); + /* i = l; ? */ + }else + *pte = mem|memflagssz(basef, PGLSZ(i)); + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + break; + } + } + return KSEG2+pa; +} + +void* +vmapflags(uintmem pa, usize size, uint flags) +{ + uintptr va; + usize o, sz; + + DBG("%d: vmapflags(%#P, %lud, %ux)\n", m->machno, pa, size, flags); + + /* Might be asking for less than a page. */ + o = pa & ((1<machno, pa, sz, flags, va, o); + return UINT2PTR(va + o); +} + + +void +vunmap(void* v, usize size) +{ + uintptr va; + + DBG("vunmap(%#p, %lud)\n", v, size); + + if(m->machno != 0) + panic("vunmap"); + + /* See the comments above in vmap. */ + va = PTR2UINT(v); + if(va >= KZERO && va+size < KZERO+1ull*MiB) + return; + + /* missing implementation */ + DBG("vunmap(%#p, %lud)\n", v, size); +} + +void* +vmap(uintmem pa, usize size) +{ + DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa)); + return vmapflags(pa, size, PtePCD|PteRW); +} + +void* +vmappat(uintmem pa, usize size, uint pattype) +{ + int i; + + DBG("vmappat(%#p, %lud, %#ux) pc=%#p\n", pa, size, pattype, getcallerpc(&pa)); + for(i = 0; i < nelem(pattab); i++) + if(pattab[i] == pattype) + return vmapflags(pa, size, patflags[i]|PteRW); + return vmap(pa, size); +} + +int +mmuwalk(PTE* pml4, uintptr va, int level, PTE** ret, uintmem (*alloc)(usize)) +{ + int l; + uintmem pa; + PTE *pte; + Mpl pl; + + pl = splhi(); + DBG("mmuwalk%d: va %#p level %d\n", m->machno, va, level); + pte = &pml4[PTLX(va, 3)]; + for(l = 3; l >= 0; l--){ + if(l == level) + break; + if(!(*pte & PteP)){ + if(alloc == nil) + break; + pa = alloc(PTSZ); + if(pa == ~0) + return -1; + memset(UINT2PTR(KADDR(pa)), 0, PTSZ); + *pte = pa|PteRW|PteP; + } + else if(*pte & PtePS) + break; + pte = UINT2PTR(KADDR(PPN(*pte))); + pte += PTLX(va, l-1); + } + *ret = pte; + splx(pl); + + return l; +} + +uintmem +mmuphysaddr(uintptr va) +{ + int l; + PTE *pte; + uintmem mask, pa; + + /* + * Given a VA, find the PA. + * This is probably not the right interface, + * but will do as an experiment. Usual + * question, should va be void* or uintptr? + */ + l = mmuwalk(UINT2PTR(m->pml4->va), va, 0, &pte, nil); + DBG("physaddr: va %#p l %d\n", va, l); + if(l < 0) + return ~0; + + mask = PGLSZ(l)-1; + pa = (*pte & ~mask) + (va & mask); + + DBG("physaddr: l %d va %#p pa %#P\n", l, va, pa); + + return pa; +} + +Page mach0pml4; + +static void +nxeon(void) +{ + Cpuidreg r; + + /* on intel64, cpuid 0x8::1 DX bit 20 means "Nxe bit in Efer allowed" */ + r = (Cpuidreg){0x80000001, 0, 0, 0}; + cpuid(&r); + if(r.cx & (1<<20)) + wrmsr(Efer, rdmsr(Efer) | Nxe); +} + +static void +mapmem(PTE* pml4) +{ + int j, i, l; + uintptr va; + uintmem lo, hi, mem, nextmem; + Mbank *b; + PTE *pte; + + /* everything else mapped at kseg2 s.t. pa = va - KSEG2 */ + for(j = 1; j < mconf.nbank; j++){ + b = mconf.bank + j; + va = KSEG2+b->base; + + lo = b->base; + hi = b->limit; + print("mapmem: mem %#P %#P size %P\n", lo, hi, b->limit-b->base); + /* Convert a range into pages */ + for(mem = lo; mem < hi; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + /* Try large pages first */ + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > hi) + continue; + if((l = mmuwalk(pml4, va, i, &pte, walkalloc)) < 0) + panic("mapmem: mmuwalk"); + *pte = mem|PteRW|PteP|PteG; + if(l > 0) + *pte |= PtePS; + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + break; + } + } + } +} + +void +apmmuinit(void) +{ + uchar *p; + + archmmu(); + /* + * NIX: KLUDGE: Has to go when each mach is using + * its own page table + */ + p = UINT2PTR(m->stack); + p += MACHSTKSZ; + + memmove(p, UINT2PTR(mach0pml4.va), PTSZ); + assert(sizeof(Page) <= sizeof(m->pml4kludge)); + m->pml4 = (Page*)m->pml4kludge; + m->pml4->va = PTR2UINT(p); + m->pml4->pa = PADDR(p); +// m->pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */ + + nxeon(); + patinit(); + putcr3(m->pml4->pa); +} + +void +mmuinit(void) +{ + Page *page; + + assert(m->machno == 0); + archmmu(); + DBG("mach%d: %#p pml4 %#p npgsz %d\n", m->machno, m, m->pml4, m->npgsz); + + page = &mach0pml4; + page->pa = getcr3(); + page->va = PTR2UINT(KADDR(page->pa)); + + m->pml4 = page; + + nxeon(); + patinit(); + + if(dbgflg) + dumpmmuwalk(KZERO); + mmuphysaddr(PTR2UINT(end)); + mapmem(UINT2PTR(m->pml4->va)); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,479 @@ +#include "all.h" +#include "io.h" +#include "apic.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +#define l16get(p) (((p)[1]<<8)|(p)[0]) +#define l32get(p) (((u32int)l16get(p+2)<<16)|l16get(p)) +#define l64get(p) (((u64int)l32get(p+4)<<32)|l32get(p)) + +/* + * MultiProcessor Specification Version 1.[14]. + */ +typedef struct { /* MP Floating Pointer */ + u8int signature[4]; /* "_MP_" */ + u8int addr[4]; /* PCMP */ + u8int length; /* 1 */ + u8int revision; /* [14] */ + u8int checksum; + u8int feature[5]; +} _MP_; + +typedef struct { /* MP Configuration Table */ + u8int signature[4]; /* "PCMP" */ + u8int length[2]; + u8int revision; /* [14] */ + u8int checksum; + u8int string[20]; /* OEM + Product ID */ + u8int oaddr[4]; /* OEM table pointer */ + u8int olength[2]; /* OEM table length */ + u8int entry[2]; /* entry count */ + u8int apicpa[4]; /* local APIC address */ + u8int xlength[2]; /* extended table length */ + u8int xchecksum; /* extended table checksum */ + u8int reserved; + + u8int entries[]; +} PCMP; + +typedef struct { + char type[6]; + int polarity; /* default for this bus */ + int trigger; /* default for this bus */ +} Mpbus; + +static Mpbus mpbusdef[] = { + { "PCI ", IPlow, TMlevel, }, + { "ISA ", IPhigh, TMedge, }, +}; +static Mpbus* mpbus[Nbus]; + +static void +mpintrprint(char* s, u8int* p) +{ + char buf[128], *b, *e; + char format[] = " type %d flags %#ux bus %d IRQ %d APIC %d INTIN %d\n"; + + b = buf; + e = b + sizeof(buf); + b = seprint(b, e, "mpparse: intr:"); + if(s != nil) + b = seprint(b, e, " %s:", s); + seprint(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]); + print(buf); +} + +static u32int +mpmkintr(u8int* p) +{ + u32int v; + Apic *apic; + int n, polarity, trigger; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(mpbus[p[4]] == nil){ + mpintrprint("no source bus", p); + return 0; + } + if(p[6] != 0xff){ + if(Napic < 256 && p[6] >= Napic){ + mpintrprint("APIC ID out of range", p); + return 0; + } + switch(p[0]){ + default: + mpintrprint("INTIN botch", p); + return 0; + case 3: /* IOINTR */ + if((apic = ioapiclookup(p[6])) == nil){ + mpintrprint("unuseable ioapic", p); + return 0; + } + if(p[7] >= apic->nrdt){ + mpintrprint("IO INTIN out of range", p); + return 0; + } + break; + case 4: /* LINTR */ + if((apic = lapiclookup(p[6])) == nil){ + mpintrprint("unuseable lapic", p); + return 0; + } + if(p[7] >= nelem(apic->lvt)){ + mpintrprint("LOCAL INTIN out of range", p); + return 0; + } + USED(apic); + break; + } + } + n = l16get(p+2); + if((polarity = (n & 0x03)) == 2 || (trigger = ((n>>2) & 0x03)) == 2){ + mpintrprint("invalid polarity/trigger", p); + return 0; + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(p[1]){ + default: + mpintrprint("invalid type", p); + return 0; + case 0: /* INT */ + switch(polarity){ + case 0: + v |= mpbus[p[4]]->polarity; + break; + case 1: + v |= IPhigh; + break; + case 3: + v |= IPlow; + break; + } + switch(trigger){ + case 0: + v |= mpbus[p[4]]->trigger; + break; + case 1: + v |= TMedge; + break; + case 3: + v |= TMlevel; + break; + } + break; + case 1: /* NMI */ + v |= TMedge|IPhigh|MTnmi; + break; + case 2: /* SMI */ + v |= TMedge|IPhigh|MTsmi; + break; + case 3: /* ExtINT */ + v |= TMedge|IPhigh|MTei; + break; + } + + return v; +} + +static int +mpparse(PCMP* pcmp, int maxmach) +{ + u8int *e, *p; + int nmach, bustype, i, n; + u32int lo; + Apic *a; + + nmach = 0; + p = pcmp->entries; + e = ((uchar*)pcmp)+l16get(pcmp->length); + while(p < e) switch(*p){ + default: + print("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e-p); + for(i = 0; p < e; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 0: /* processor */ + /* + * Initialise the APIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, the memory mapped address comes + * from the PCMP structure as the addess is local to the + * CPU and identical for all. Indicate whether this is + * the bootstrap processor (p[3] & 0x02). + */ + DBG("mpparse: cpu %d pa %#ux bp %d\n", + p[1], l32get(pcmp->apicpa), p[3] & 0x02); + if((p[3] & 0x01) != 0 && nmach < maxmach){ + nmach++; + lapicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02); + } + p += 20; + break; + case 1: /* bus */ + DBG("mpparse: bus: %d type %6.6s\n", p[1], (char*)p+2); + if(mpbus[p[1]] != nil){ + print("mpparse: bus %d already allocated\n", p[1]); + p += 8; + break; + } + for(i = 0; i < nelem(mpbusdef); i++){ + if(memcmp(p+2, mpbusdef[i].type, 6) != 0) + continue; + mpbus[p[1]] = &mpbusdef[i]; + break; + } + if(mpbus[p[1]] == nil) + print("mpparse: bus %d type %6.6s unknown\n", + p[1], (char*)p+2); + + p += 8; + break; + case 2: /* IOAPIC */ + /* + * Initialise the IOAPIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, p[4-7] is the memory mapped address. + */ + if(p[3] & 0x01) + ioapicinit(p[1], -1, l32get(p+4)); + + p += 8; + break; + case 3: /* IOINTR */ + /* + * p[1] is the interrupt type; + * p[2-3] contains the polarity and trigger mode; + * p[4] is the source bus; + * p[5] is the IRQ on the source bus; + * p[6] is the destination APIC; + * p[7] is the INITIN pin on the destination APIC. + */ + if(p[6] == 0xff){ + mpintrprint("routed to all IOAPICs", p); + p += 8; + break; + } + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + bustype = -1; + if(memcmp(mpbus[p[4]]->type, "PCI ", 6) == 0) + bustype = BusPCI; + else if(memcmp(mpbus[p[4]]->type, "ISA ", 6) == 0) + bustype = BusISA; + if(bustype != -1) + ioapicintrinit(bustype, p[4], p[6], p[7], p[5], lo); + + p += 8; + break; + case 4: /* LINTR */ + /* + * Format is the same as IOINTR above. + */ + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Everything was checked in mpmkintr above. + */ + if(p[6] == 0xff){ + for(i = 0; i < Napic; i++){ + if((a = lapiclookup(i)) == nil || a->addr != nil) + continue; + a->lvt[p[7]] = lo; + } + } + else{ +// xlapic[p[6]].lvt[p[7]] = lo; + if((a = lapiclookup(p[6])) != nil) + a->lvt[p[7]] = lo; + } + p += 8; + break; + } + + /* + * There's nothing of interest in the extended table, + * but check it for consistency. + */ + p = e; + e = p + l16get(pcmp->xlength); + while(p < e) switch(*p){ + default: + n = p[1]; + print("mpparse: unknown extended entry %d length %d\n", *p, n); + for(i = 0; i < n; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 128: + DBG("address space mapping\n"); + DBG(" bus %d type %d base %#llux length %#llux\n", + p[2], p[3], l64get(p+4), l64get(p+12)); + p += p[1]; + break; + case 129: + DBG("bus hierarchy descriptor\n"); + DBG(" bus %d sd %d parent bus %d\n", + p[2], p[3], p[4]); + p += p[1]; + break; + case 130: + DBG("compatibility bus address space modifier\n"); + DBG(" bus %d pr %d range list %d\n", + p[2], p[3], l32get(p+4)); + p += p[1]; + break; + } + return nmach; +} + +static int +sigchecksum(void* address, int length) +{ + u8int *p, sum; + + sum = 0; + for(p = address; length-- > 0; p++) + sum += *p; + + return sum; +} + +static void* +sigscan(u8int* address, int length, char* signature) +{ + u8int *e, *p; + int siglength; + + e = address+length; + siglength = strlen(signature); + for(p = address; p+siglength < e; p += 16){ + if(memcmp(p, signature, siglength)) + continue; + return p; + } + + return nil; +} + +static uintptr mptab[] = {0, 1024, 639*1024, 1024, 0xf0000, 0x10000, 0, 1024}; + +static void* +sigsearch(char* signature) +{ + int i; + uintmem p; + u8int *bda; + void *r; + + /* + * Search for the data structure: + * 1) in the first KB of the EBDA; + * 2) in the last KB of system base memory; + * 3) in the BIOS ROM between 0xe0000 and 0xfffff. + */ + bda = BIOSSEG(0x40); + if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){ + if((p = (bda[0x0f]<<8)|bda[0x0e])){ + if((r = sigscan(BIOSSEG(p), 1024, signature)) != nil) + return r; + } + } + + if((p = (bda[0x0F]<<8|bda[0x0E])<<4) || + (p = (bda[0x14]<<8|bda[0x13])*1024-1024)) + mptab[nelem(mptab)-2] = (uintptr)p; + for(i = 0; i < nelem(mptab); i += 2) + if(r = sigscan(KADDR(mptab[i]), mptab[i+1], signature)) + return r; + return nil; +} + +void +mpsinit(int maxmach) +{ + u8int *p; + int i, n; + _MP_ *mp; + PCMP *pcmp; + +#ifdef acpiworking + mpacpi(maxmach); +#endif + + if((mp = sigsearch("_MP_")) == nil){ + print("mp: no tables\n"); + return; + } + if(DBGFLG){ + DBG("_MP_ @ %#p, addr %#ux length %ud rev %d", + mp, l32get(mp->addr), mp->length, mp->revision); + for(i = 0; i < sizeof(mp->feature); i++) + DBG(" %2.2#ux", mp->feature[i]); + DBG("\n"); + } + if(mp->revision != 1 && mp->revision != 4) + return; + if(sigchecksum(mp, mp->length*16) != 0) + return; + + if((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == nil) + return; + if(pcmp->revision != 1 && pcmp->revision != 4){ + vunmap(pcmp, sizeof(PCMP)); + return; + } + n = l16get(pcmp->length) + l16get(pcmp->xlength); + vunmap(pcmp, sizeof(PCMP)); + if((pcmp = vmap(l32get(mp->addr), n)) == nil) + return; + if(sigchecksum(pcmp, l16get(pcmp->length)) != 0){ + vunmap(pcmp, n); + return; + } + if(DBGFLG){ + DBG("PCMP @ %#p length %#ux revision %d\n", + pcmp, l16get(pcmp->length), pcmp->revision); + DBG(" %20.20s oaddr %#ux olength %#ux\n", + (char*)pcmp->string, l32get(pcmp->oaddr), + l16get(pcmp->olength)); + DBG(" entry %d apicpa %#ux\n", + l16get(pcmp->entry), l32get(pcmp->apicpa)); + + DBG(" xlength %#ux xchecksum %#ux\n", + l16get(pcmp->xlength), pcmp->xchecksum); + } + if(pcmp->xchecksum != 0){ + p = ((u8int*)pcmp) + l16get(pcmp->length); + i = sigchecksum(p, l16get(pcmp->xlength)); + if(((i+pcmp->xchecksum) & 0xff) != 0){ + print("mp: extended table checksums to %#ux\n", i); + vunmap(pcmp, n); + return; + } + } + + /* + * Parse the PCMP table and set up the datastructures + * for later interrupt enabling and application processor + * startup. + */ + mpparse(pcmp, maxmach); + lapicdump(); + ioapicdump(); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,113 @@ +#include "all.h" +#include "io.h" +#include "apic.h" + +enum { + Dpcicap = 1<<0, + Dmsicap = 1<<1, + Dvec = 1<<2, + Debug = 0, +}; + +enum { + /* address */ + Msiabase = 0xfee00000u, + Msiadest = 1<<12, /* same as 63:56 of apic vector */ + Msiaedest = 1<<4, /* same as 55:48 of apic vector */ + Msialowpri = 1<<3, /* redirection hint */ + Msialogical = 1<<2, + + /* data */ + Msidlevel = 1<<15, + Msidassert = 1<<14, + Msidlogical = 1<<11, + Msidmode = 1<<8, /* 3 bits; delivery mode */ + Msidvector = 0xff<<0, +}; + +enum{ + /* msi capabilities */ + Vmask = 1<<8, + Cap64 = 1<<7, + Mmesgmsk = 7<<4, + Mmcap = 7<<1, + Msienable = 1<<0, +}; + +static int +msicap(Pcidev *p) +{ + int c; + + c = pcicap(p, PciCapMSI); + if(c == -1) + return 0; + return c; +} + +static int +blacklist(Pcidev *p) +{ + switch(p->vid<<16 | p->did){ + case 0x11ab<<16 | 0x6485: + return -1; + } + return 0; +} + +int +pcimsienable(Pcidev *p, uvlong vec) +{ + char *s; + uint c, f, d, datao, lopri, dmode, logical; + + c = msicap(p); + if(c == 0) + return -1; + + f = pcicfgr16(p, c + 2) & ~Mmesgmsk; + + if(blacklist(p) != 0) + return -1; + datao = 8; + d = vec>>48; + lopri = (vec & 0x700) == MTlp; + logical = (vec & Lm) != 0; + pcicfgw32(p, c + 4, Msiabase | Msiaedest * d + | Msialowpri * lopri | Msialogical * logical); + if(f & Cap64){ + datao += 4; + pcicfgw32(p, c + 8, 0); + } + dmode = (vec >> 8) & 7; + pcicfgw16(p, c + datao, Msidassert | Msidlogical * logical + | Msidmode * dmode | (uint)vec & 0xff); + if(f & Vmask) + pcicfgw32(p, c + datao + 4, 0); + + /* leave vectors configured but disabled for debugging */ + if((s = getconf("*nomsi")) != nil && strtoul(s, nil, 0) != 0) + return -1; + + pcicfgw16(p, c + 2, f); + return 0; +} + +int +pcimsimask(Pcidev *p, int mask) +{ + uint c, f; + + c = msicap(p); + if(c == 0) + return -1; + f = pcicfgr16(p, c + 2) & ~Msienable; + if(mask){ + pcicfgw16(p, c + 2, f & ~Msienable); +// pciclrbme(p); cheeze + }else{ + pcisetbme(p); + pcicfgw16(p, c + 2, f | Msienable); + } + return 0; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,55 @@ +#include "all.h" +#include "io.h" +#include "ureg.h" + +#include "dosfs.h" + +static Dosfile file; +static int opened; +char nvrfile[128] = "plan9.nvr"; + +static void +nvopen(void) +{ + Mpl s; + Dosfile *fp; + + if(opened) + return; + opened = 1; + s = spllo(); + fp = dosopen(&dos, nvrfile, &file); + splx(s); + if(fp == 0) + panic("can't open %s\n", nvrfile); +} + +int +nvread(int offset, void *a, int n) +{ + int r; + Mpl s; + + nvopen(); + + s = spllo(); + file.offset = offset; + r = dosread(&file, a, n); + splx(s); + return r; +} + +int +nvwrite(int offset, void *a, int n) +{ + int r; + Mpl s; + + nvopen(); + + s = spllo(); + file.offset = offset; + r = doswrite(&file, a, n); + splx(s); + return r; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:02 2013 @@ -0,0 +1,179 @@ +#include "u.h" +#include "../port/lib.h" +#include "dat.h" +#include "fns.h" +#include "adr.h" +#include "io.h" + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 3584 bytes available at CONFADDR. + */ +#define CONFADDR PTR2UINT(KADDR(0x0001200)) + +#define BOOTLINE ((char*)CONFADDR) +#define BOOTLINELEN 64 +#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (4096-0x200-BOOTLINELEN) + +enum { + Maxconf = 64, +}; + +typedef struct C C; +struct C { + char *name; + char *val; +}; + +static C cfg[Maxconf]; +static int ncfg; +static char dbgflg[127]; + +static void +parseoptions(void) +{ + long i, n; + char *cp, *line[Maxconf]; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + n = getfields(cp, line, Maxconf, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + cfg[ncfg].name = line[i]; + cfg[ncfg].val = cp; + ncfg++; + } +} + +static void +cmdline(void) +{ + char *p, *f[32], **argv, buf[200]; + int argc, n, o; + + p = getconf("*cmdline"); + if(p == nil) + return; + snprint(buf, sizeof buf, "%s", p); + argc = tokenize(buf, f, nelem(f)); + argv = f; + + /* + * Process flags. + * Flags [A-Za-z] may be optionally followed by + * an integer level between 1 and 127 inclusive + * (no space between flag and level). + * '--' ends flag processing. + */ + while(--argc > 0 && (*++argv)[0] == '-' && (*argv)[1] != '-'){ + while(o = *++argv[0]){ + if(!(o >= 'A' && o <= 'Z') && !(o >= 'a' && o <= 'z')) + continue; + n = strtol(argv[0]+1, &p, 0); + if(p == argv[0]+1 || n < 1 || n > 127) + n = 1; + argv[0] = p-1; + dbgflg[o] = n; + } + } +} + +static int typemap[] = { + Anone, + Amemory, + Areserved, + Aacpireclaim, + Aacpinvs, + Aunusable, + Adisable, +}; + +static void +e820(void) +{ + char *p, *s; + uvlong base, len, type; + + p = getconf("*e820"); + if(p == nil) + return; + for(s = p;;){ + if(*s == 0) + break; + type = strtoull(s, &s, 16); + if(*s != ' ') + break; + base = strtoull(s, &s, 16); + if(*s != ' ') + break; + len = strtoull(s, &s, 16) - base; + if(*s != ' ' && *s != 0 || len == 0) + break; + if(type >= nelem(typemap)) + continue; + adrmapinit(base, len, typemap[type], Mfree); + } +} + +void +options(void) +{ + parseoptions(); + e820(); + cmdline(); +} + + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < ncfg; i++) + if(cistrcmp(cfg[i].name, name) == 0) + return cfg[i].val; + return nil; +} + +int +pciconfig(char *class, int ctlrno, Pciconf *pci) +{ + char cc[32], *p; + int i; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + p = getconf(cc); + if(p == nil) + return 0; + + pci->type = ""; + snprint(pci->optbuf, sizeof pci->optbuf, "%s", p); + pci->nopt = tokenize(pci->optbuf, pci->opt, nelem(pci->opt)); + for(i = 0; i < pci->nopt; i++){ + p = pci->opt[i]; + if(cistrncmp(p, "type=", 5) == 0) + pci->type = p + 5; + else if(cistrncmp(p, "port=", 5) == 0) + pci->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + pci->irq = strtoul(p+4, &p, 0); +// else if(cistrncmp(p, "mem=", 4) == 0) +// pci->mem = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "tbdf=", 5) == 0) + pci->tbdf = strtotbdf(p+5, &p, 0); + } + return 1; +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,772 @@ +/* pci */ +#include "all.h" +#include "io.h" + +enum { /* configuration mechanism #1 */ + PciADDR = 0xcf8, + PciDATA = 0xcfc, + + Maxfn = 7, + Maxdev = 31, + Maxbus = 255, + + /* command register */ + IOen = 1<<0, + MEMen = 1<<1, + MASen = 1<<2, + MemWrInv = 1<<4, + PErrEn = 1<<6, + SErrEn = 1<<8, + + Write, + Read, +}; + +static Lock pcicfglock; +static Lock pcicfginitlock; +static int pcicfgmode = -1; +static Pcidev* pciroot; +static Pcidev* pcilist; +static Pcidev* pcitail; + +static int pcicfgrw(int, int, int, int, int); + +u32int +pcibarsize(Pcidev *p, int rno) +{ + u32int v, size; + + v = pcicfgr32(p, rno); + pcicfgw32(p, rno, 0xFFFFFFF0); + size = pcicfgr32(p, rno); + if(v & 1) + size |= 0xFFFF0000; + pcicfgw32(p, rno, v); + + return -(size & ~0x0F); +} + +static void +cmd_pcihinv(int argc, char *argv[]) +{ + int i, flags = 0; + + for (i = 1; i < argc; i++) + if (strcmp(argv[i], "-v") == 0) + flags |= 1; + else { + print("unknown pcihinv option %s; options are: -v\n", argv[i]); + return; + } + pcihinv(nil, flags); /* print the whole device tree */ +} + +static u32int +pcimask(u32int v) +{ + u32int m; + + m = 8*sizeof(v); + for(m = 1<<(m-1); m != 0; m >>= 1) { + if(m & v) + break; + } + + m--; + if((v & m) == 0) + return v; + + v |= m; + return v+1; +} + +static int +pcilscan(int bno, Pcidev** list) +{ + Pcidev *p, *head, *tail; + int dno, fno, i, hdt, l, maxfno, maxubn, sbn, tbdf, ubn; + + maxubn = bno; + head = nil; + tail = nil; + for(dno = 0; dno <= Maxdev; dno++){ + maxfno = 0; + for(fno = 0; fno <= maxfno; fno++){ + /* + * For this possible device, form the + * bus+device+function triplet needed to address it + * and try to read the vendor and device ID. + * If successful, allocate a device struct and + * start to fill it in with some useful information + * from the device's configuration space. + */ + tbdf = MKBUS(BusPCI, bno, dno, fno); + l = pcicfgrw(tbdf, PciVID, 0, Read, 4); + if(l == 0xFFFFFFFF || l == 0) + continue; + p = ialloc(sizeof *p, 0); + p->tbdf = tbdf; + p->vid = l; + p->did = l>>16; + + if(pcilist != nil) + pcitail->list = p; + else + pcilist = p; + pcitail = p; + + p->pcr = pcicfgr16(p, PciPCR); + p->rid = pcicfgr8(p, PciRID); + p->ccrp = pcicfgr8(p, PciCCRp); + p->ccru = pcicfgr8(p, PciCCRu); + p->ccrb = pcicfgr8(p, PciCCRb); + p->cls = pcicfgr8(p, PciCLS); + p->ltr = pcicfgr8(p, PciLTR); + + p->intl = pcicfgr8(p, PciINTL); + + /* + * If the device is a multi-function device adjust the + * loop count so all possible functions are checked. + */ + hdt = pcicfgr8(p, PciHDT); + if(hdt & 0x80) + maxfno = Maxfn; + + /* + * If appropriate, read the base address registers + * and work out the sizes. + */ + switch(p->ccrb) { + default: + if((hdt & 0x7F) != 0) + break; + for(i = 0; i < nelem(p->mem); i++) { + p->mem[i].bar = (u32int)pcicfgr32(p, PciBAR0+4*i); + p->mem[i].size = pcibarsize(p, PciBAR0+4*i); + } + break; + + case 0x00: + case 0x05: /* memory controller */ + case 0x06: /* bridge device */ + break; + } + + if(head != nil) + tail->link = p; + else + head = p; + tail = p; + } + } + + *list = head; + for(p = head; p != nil; p = p->link){ + /* + * Find PCI-PCI bridges and recursively descend the tree. + */ + if(p->ccrb != 0x06 || p->ccru != 0x04) + continue; + + /* + * If the secondary or subordinate bus number is not + * initialised try to do what the PCI BIOS should have + * done and fill in the numbers as the tree is descended. + * On the way down the subordinate bus number is set to + * the maximum as it's not known how many buses are behind + * this one; the final value is set on the way back up. + */ + sbn = pcicfgr8(p, PciSBN); + ubn = pcicfgr8(p, PciUBN); + + if(sbn == 0 || ubn == 0) { + print("%τ: unconfigured bridge\n", p->tbdf); + + sbn = maxubn+1; + /* + * Make sure memory, I/O and master enables are + * off, set the primary, secondary and subordinate + * bus numbers and clear the secondary status before + * attempting to scan the secondary bus. + * + * Initialisation of the bridge should be done here. + */ + pcicfgw32(p, PciPCR, 0xFFFF0000); + pcicfgw32(p, PciPBN, Maxbus<<16 | sbn<<8 | bno); + pcicfgw16(p, PciSPSR, 0xFFFF); + maxubn = pcilscan(sbn, &p->bridge); + pcicfgw32(p, PciPBN, maxubn<<16 | sbn<<8 | bno); + } + else { + if(ubn > maxubn) + maxubn = ubn; + pcilscan(sbn, &p->bridge); + } + } + + return maxubn; +} + +static uchar +pIIxget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 0x60, 0x61, 0x62, 0x63 */ + pirq = pcicfgr8(router, link); + return (pirq < 16)? pirq: 0; +} + +static void +pIIxset(Pcidev *router, uchar link, uchar irq) +{ + pcicfgw8(router, link, irq); +} + +static uchar +viaget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 1, 2, 3, 5 */ + pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0; + + return (link & 1)? (pirq >> 4): (pirq & 15); +} + +static void +viaset(Pcidev *router, uchar link, uchar irq) +{ + uchar pirq; + + pirq = pcicfgr8(router, 0x55 + (link >> 1)); + pirq &= (link & 1)? 0x0f: 0xf0; + pirq |= (link & 1)? (irq << 4): (irq & 15); + pcicfgw8(router, 0x55 + (link>>1), pirq); +} + +typedef struct Bridge Bridge; +struct Bridge +{ + ushort vid; + ushort did; + uchar (*get)(Pcidev *, uchar); + void (*set)(Pcidev *, uchar, uchar); +}; + +static Bridge southbridges[] = { + { 0x8086, 0xffff, pIIxget, pIIxset }, /* Intel * */ + + { 0x1002, 0xffff, nil, nil }, /* ati (amd) */ + { 0x1022, 0xffff, nil, nil }, /* amd */ + { 0x10de, 0x00d1, nil, nil }, /* NVIDIA nForce 3 */ + { 0x1106, 0x3227, viaget, viaset }, /* Viatech VT8237 */ + { 0x1166, 0x0200, nil, nil }, /* ServerWorks ServerSet III LE */ +}; + +typedef struct Slot Slot; +struct Slot { + uchar bus; /* Pci bus number */ + uchar dev; /* Pci device number */ + uchar maps[12]; /* Avoid structs! Link and mask. */ + uchar slot; /* Add-in/built-in slot */ + uchar reserved; +}; + +typedef struct Router Router; +struct Router { + uchar signature[4]; /* Routing table signature */ + uchar version[2]; /* Version number */ + uchar size[2]; /* Total table size */ + uchar bus; /* Interrupt router bus number */ + uchar devfn; /* Router's devfunc */ + uchar pciirqs[2]; /* Exclusive PCI irqs */ + uchar compat[4]; /* Compatible PCI interrupt router */ + uchar miniport[4]; /* Miniport data */ + uchar reserved[11]; + uchar checksum; +}; + +#pragma varargck type "τ" int + +static int +τfmt(Fmt* fmt) +{ + char buf[32], *p, *e; + int type, tbdf; + + p = buf; + e = buf+sizeof buf; + tbdf = va_arg(fmt->args, int); + if(tbdf == -1) + return fmtstrcpy(fmt, "unk"); + type = BUSTYPE(tbdf); + if(type == 12) + p = seprint(p, e, "pci"); + else + p = seprint(p, e, "%d", type); + seprint(p, e, ".%d.%d.%d", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + return fmtstrcpy(fmt, buf); +} + +static void +pcirouting(void) +{ + uchar *p, pin, irq, link, *map; + int size, i, fn, tbdf; + Bridge *southbridge; + Pcidev *sbpci, *pci; + Slot *e; + Router *r; + + /* Search for PCI interrupt routing table in BIOS */ + for(p = (uchar*)KADDR(0xf0000); p < (uchar*)KADDR(0xfffff); p += 16) + if(p[0] == '$' && p[1] == 'P' && p[2] == 'I' && p[3] == 'R') + break; + + if(p >= (uchar *)KADDR(0xfffff)) + return; + + r = (Router *)p; + + fmtinstall(L'τ', τfmt); + if(0) + print("PCI interrupt routing table version %d.%d at %.6llux\n", + r->version[0], r->version[1], (uintptr)r & 0xfffff); + + tbdf = (BusPCI << 24)|(r->bus << 16)|(r->devfn << 8); + sbpci = pcimatchtbdf(tbdf); + if(sbpci == nil) { + print("pcirouting: Cannot find south bridge %τ\n", tbdf); + return; + } + + for(i = 0; i != nelem(southbridges); i++) + if(sbpci->vid == southbridges[i].vid + && (sbpci->did == southbridges[i].did || southbridges[i].did == 0xffff)) + break; + + if(i == nelem(southbridges)) { + print("pcirouting: ignoring south bridge %τ %.4ux/%.4ux\n", tbdf, sbpci->vid, sbpci->did); + return; + } + southbridge = &southbridges[i]; + if(southbridge->get == nil || southbridge->set == nil) + return; + + size = (r->size[1] << 8)|r->size[0]; + for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) { + if(0){ + print("%.2ux/%.2ux %.2ux: ", e->bus, e->dev, e->slot); + for (i = 0; i != 4; i++) { + uchar *m = &e->maps[i * 3]; + print("[%d] %.2ux %.4ux ", + i, m[0], (m[2] << 8)|m[1]); + } + print("\n"); + } + + for(fn = 0; fn <= Maxfn; fn++) { + tbdf = MKBUS(BusPCI, e->bus, e->dev, fn); + pci = pcimatchtbdf(tbdf); + if(pci == nil) + continue; + pin = pcicfgr8(pci, PciINTP); + if(pin == 0 || pin == 0xff) + continue; + + map = &e->maps[(pin - 1) * 3]; + link = map[0]; + irq = southbridge->get(sbpci, link); + if(irq == 0 || irq == pci->intl) + continue; + if(pci->intl != 0 && pci->intl != 0xFF) { + print("pcirouting: %τ at pin %d link %d irq %d -> %d\n", + tbdf, pin, link, irq, pci->intl); + southbridge->set(sbpci, link, pci->intl); + continue; + } + print("pcirouting: %τ at pin %d link %d irq %d\n", tbdf, pin, link, irq); + pcicfgw8(pci, PciINTL, irq); + pci->intl = irq; + } + } +} + +static void +pcicfginit(void) +{ + int bno, n; + Pcidev **list; + + if(pcicfgmode != -1) + return; + lock(&pcicfginitlock); + if(pcicfgmode != -1){ + unlock(&pcicfginitlock); + return; + } + + cmd_install("pcihinv", "-- pci inventory", cmd_pcihinv); + +// fmtinstall('τ', fmtT); + + /* + * Try to determine if PCI Mode1 configuration implemented. + * (Bits [30:24] of PciADDR must be 0, according to the spec.) + * Mode2 won't appear in 64-bit machines. + */ + n = inl(PciADDR); + if(!(n & 0x7F000000)){ + outl(PciADDR, 0x80000000); + outb(PciADDR+3, 0); + if(inl(PciADDR) & 0x80000000) + pcicfgmode = 1; + } + outl(PciADDR, n); + + if(pcicfgmode < 0){ + unlock(&pcicfginitlock); + return; + } + + list = &pciroot; + for(bno = 0; bno <= Maxbus; bno++) { + bno = pcilscan(bno, list); + while(*list) + list = &(*list)->link; + } + pcirouting(); +// pcireservemem(); + unlock(&pcicfginitlock); + + if(getconf("*pcihinv")) + pcihinv(pciroot, 1); +} + +static int +pcicfgrw(int tbdf, int r, int data, int rw, int w) +{ + int o, x, er; + + if(pcicfgmode == -1) + pcicfginit(); + if(pcicfgmode != 1) + return -1; + if(BUSDNO(tbdf) > Maxdev) + return -1; + + lock(&pcicfglock); + o = r & 4-w; + er = r&0xfc | (r & 0xf00)<<16; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|er); + if(rw == Read){ + x = -1; + switch(w){ + case 1: + x = inb(PciDATA+o); + break; + case 2: + x = ins(PciDATA+o); + break; + case 4: + x = inl(PciDATA+o); + break; + } + }else{ + x = 0; + switch(w){ + case 1: + outb(PciDATA+o, data); + break; + case 2: + outs(PciDATA+o, data); + break; + case 4: + outl(PciDATA+o, data); + break; + } + } +// outl(PciADDR, 0); + unlock(&pcicfglock); + + return x; +} + +int +pcicfgr8(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 1); +} + +void +pcicfgw8(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 1); +} + +int +pcicfgr16(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 2); +} + +void +pcicfgw16(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 2); +} + +int +pcicfgr32(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 4); +} + +void +pcicfgw32(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 4); +} + +void +pciclrmwi(Pcidev* p) +{ + p->pcr &= ~MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + + +Pcidev* +pcimatch(Pcidev* prev, int vid, int did) +{ + if(pcicfgmode == -1) + pcicfginit(); + + prev = prev? prev->list: pcilist; + for(; prev != nil; prev = prev->list){ + if((vid == 0 || prev->vid == vid) + && (did == 0 || prev->did == did)) + break; + } + return prev; +} + +Pcidev* +pcimatchtbdf(int tbdf) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + + for(p = nil; p = pcimatch(p, 0, 0); ) + if(p->tbdf == tbdf) + break; + return p; +} + +static char * +ccru2name(int ccru) +{ + switch (ccru>>8) { + case 0x01: /* mass storage controller */ + return "disks"; + case 0x02: /* network controller */ + return "net"; /* probably ether */ + case 0x03: /* display controller */ + return "video"; + case 0x04: /* multimedia device */ + return "audio"; + case 0x07: /* simple communication controllers */ + return "serial"; + case 0x08: /* base system peripherals */ + return "basic"; + case 0x09: /* input devices */ + return "input"; + case 0x0A: /* docking stations */ + return "dock"; + case 0x0B: /* processors */ + return "cpu"; + case 0x0C: /* serial bus controllers */ + return "usb"; + case 0x00: + return "memct0"; + case 0x05: /* memory controller */ + return "memctl"; + case 0x06: /* bridge device */ + return "bridge"; + default: + return "*gok*"; + } +} + +static char * +vid2name(int vid) +{ + switch (vid) { + case 0x1000: + return "ncr"; + case 0x1002: + return "ati"; + case 0x100b: + return "natsemi"; + case 0x1011: + return "dec"; + case 0x1013: + return "cirrus"; + case 0x1022: + return "amd"; + case 0x1023: + return "cyber?"; + case 0x102b: + return "matrox"; + case 0x102c: + return "hiq"; + case 0x1039: + return "sis"; + case 0x104b: + return "mylex"; + case 0x105a: + return "promise"; + case 0x105d: + return "number9"; + case 0x10a9: + return "sgi"; + case 0x10b7: + return "3com"; + case 0x10c8: + return "neomagic"; /* or magicgraph */ + case 0x10de: + return "nvidia"; + case 0x10ec: + return "realtek"; + case 0x11ab: + return "marvell"; + case 0x11ad: + return "(pnic?)"; + case 0x121a: + return "voodoo"; + case 0x12ae: + return "alteon"; + case 0x1385: + return "netgear"; + case 0x14c1: + return "myri"; + case 0x15ad: + return "vmware"; + case 0x16ec: + return "usrobot"; + case 0x5333: /* "S" "3". har, har. */ + return "s3"; + case 0x8086: + return "intel"; + default: + return "*GOK*"; + } +} + +void +pcihinv(Pcidev* p, uint flags) +{ + int i; + Pcidev *t; + + if(p == nil) { + p = pciroot; + print("bus dev type "); + if (flags) + print("%7s", ""); + print("vid "); + if (flags) + print("%8s", ""); + print("did intl memory\n"); + } + for(t = p; t != nil; t = t->link) { + print("%d.%2d.%d %.4ux", BUSBNO(t->tbdf), BUSDNO(t->tbdf), + BUSFNO(t->tbdf), t->ccru); + if (flags) + print(" %-6s", ccru2name(t->ccru)); + print(" %.4ux", t->vid); + if (flags) + print(" %-7s", vid2name(t->vid)); + print(" %.4ux %2d ", t->did, t->intl); + + for(i = 0; i < nelem(p->mem); i++) { + if(t->mem[i].size == 0) + continue; + print("%d:%#P %d ", i, + t->mem[i].bar, t->mem[i].size); + } + if(t->bridge) + print("->%d", BUSBNO(t->bridge->tbdf)); + print("\n"); + } + while(p != nil) { + if(p->bridge != nil) + pcihinv(p->bridge, flags); + p = p->link; + } +} + +void +pcireset(void) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + for(p = pcilist; p != nil; p = p->list) + pciclrbme(p); +} + +void +pcisetbme(Pcidev* p) +{ + p->pcr |= MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrbme(Pcidev* p) +{ + p->pcr &= ~MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +int +pcicap(Pcidev *p, int cap) +{ + int i, c, off; + + /* status register bit 4 has capabilities */ + if((pcicfgr16(p, PciPSR) & 1<<4) == 0) + return -1; + switch(pcicfgr8(p, PciHDT) & 0x7f){ + default: + return -1; + case 0: /* etc */ + case 1: /* pci to pci bridge */ + off = 0x34; + break; + case 2: /* cardbus bridge */ + off = 0x14; + break; + } + for(i = 48; i--;){ + off = pcicfgr8(p, off); + if(off < 0x40 || (off & 3)) + break; + off &= ~3; + c = pcicfgr8(p, off); + if(c == 0xff) + break; + if(c == cap) + return off; + off++; + } + return -1; +} + --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,98 @@ +#include "all.h" +#include "apic.h" +#include "sipi.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +#define SIPIHANDLER (KZERO+0x3000) + +void +sipi(void) +{ + Apic *apic; + Mach *mach; + int apicno, i, nproc; + u32int *sipiptr; + uintmem sipipa; + u8int *alloc, *p; + extern void squidboy(int); + + /* + * Move the startup code into place, + * must be aligned properly. + */ + sipipa = mmuphysaddr(SIPIHANDLER); + if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB)) + panic("sipi: invalid sipipa"); + sipiptr = UINT2PTR(SIPIHANDLER); + memmove(sipiptr, sipihandler, sizeof(sipihandler)); + DBG("sipiptr %#p sipipa %#P\n", sipiptr, sipipa); + + /* + * Notes: + * The Universal Startup Algorithm described in the MP Spec. 1.4. + * The data needed per-processor is the sum of the stack, page + * table pages, vsvm page and the Mach page. The layout is similar + * to that described in data.h for the bootstrap processor, but + * with any unused space elided. + */ + nproc = 0; + for(apicno = 0; apicno < Napic; apicno++){ + if((apic = lapiclookup(apicno)) == nil || apic->addr != 0 || apic->machno == 0) + continue; + nproc++; + if(nproc == MACHMAX){ + print("sipi: MACHMAX too small %d\n", nproc); + break; + } + + /* + * NOTE: for now, share the page tables with the + * bootstrap processor, until the lsipi code is worked out, + * so only the Mach and stack portions are used below. + */ + alloc = ialloc(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096); + if(alloc == nil) + continue; + p = alloc+MACHSTKSZ; + + sipiptr[-1] = mmuphysaddr(PTR2UINT(p)); + DBG("p %#p sipiptr[-1] %#ux\n", p, sipiptr[-1]); + + p += 4*PTSZ+4*KiB; + + /* + * Committed. If the AP startup fails, can't safely + * release the resources, who knows what mischief + * the AP is up to. Perhaps should try to put it + * back into the INIT state? + */ + mach = (Mach*)p; + mach->machno = apic->machno; /* NOT one-to-one... */ + mach->splpc = PTR2UINT(squidboy); + mach->apicno = apicno; + mach->stack = PTR2UINT(alloc); + mach->vsvm = alloc+MACHSTKSZ+4*PTSZ; + + p = KADDR(0x467); + *p++ = sipipa; + *p++ = sipipa>>8; + *p++ = 0; + *p = 0; + + nvramwrite(0x0f, 0x0a); + lapicsipi(apicno, sipipa); + + for(i = 0; i < 5000; i += 5){ + if(mach->online) + break; + delay(5); + } + nvramwrite(0x0f, 0x00); + + DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n", + mach, sys->machptr[mach->machno], + apicno, mach->machno, mach->cpumhz); + } +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,52 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define DBG(...) + +void +squidboy(int apicno) +{ + sys->machptr[m->machno] = m; +// setmachsched(m); + + m->perf.period = 1; + m->cpuhz = sys->machptr[0]->cpuhz; + m->cpumhz = sys->machptr[0]->cpumhz; + + DBG("Hello Squidboy %d %d\n", apicno, m->machno); + + vsvminit(MACHSTKSZ); + apmmuinit(); + if(!lapiconline()) + ndnr(); + fpuinit(); + m->splpc = 0; + m->online = 1; + + /* + * CAUTION: no time sync done, etc. + */ + DBG("Wait for the thunderbirds!\n"); + while(!active.thunderbirdsarego) + ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + + DBG("cpu%d color %d tsc %lld\n", + m->machno, machcolor(m->machno), m->rdtsc); + + /* + * Enable the timer interrupt. + */ +// apictimerenab(); + lapicpri(0); + +//// timersinit(); +//// adec(&active.nbooting); +//// ainc(&active.nonline); + + schedinit(); + panic("cpu%d: apic%d: squidboy returns", m->machno, apicno); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,136 @@ +#include "all.h" +#include "io.h" + +enum { + Paddr= 0x70, /* address port */ + Pdata= 0x71, /* data port */ + + Seconds= 0x00, + Minutes= 0x02, + Hours= 0x04, + Mday= 0x07, + Month= 0x08, + Year= 0x09, + Status= 0x0A, + + Nbcd= 6, +}; + +#define GETBCD(o) ((bcdclock[o]&0xf) + 10*(bcdclock[o]>>4)) +#define PUTBCD(n,o) bcdclock[o] = (n % 10) | (((n / 10) % 10)<<4) + +static Lock rtclock; + +void +setrtc(Timet secs) +{ + Rtc rtc; + uchar bcdclock[Nbcd]; + + sec2rtc(secs, &rtc); + + PUTBCD(rtc.sec, 0); + PUTBCD(rtc.min, 1); + PUTBCD(rtc.hour, 2); + PUTBCD(rtc.mday, 3); + PUTBCD(rtc.mon, 4); + PUTBCD(rtc.year, 5); + + ilock(&rtclock); + outb(Paddr, Seconds); outb(Pdata, bcdclock[0]); + outb(Paddr, Minutes); outb(Pdata, bcdclock[1]); + outb(Paddr, Hours); outb(Pdata, bcdclock[2]); + outb(Paddr, Mday); outb(Pdata, bcdclock[3]); + outb(Paddr, Month); outb(Pdata, bcdclock[4]); + outb(Paddr, Year); outb(Pdata, bcdclock[5]); + iunlock(&rtclock); +} + +static ulong +_rtctime(void) +{ + uchar bcdclock[Nbcd]; + Rtc rtc; + int i; + + /* don't do the read until the clock is no longer busy */ + for(i = 0; i < 10000; i++){ + outb(Paddr, Status); + if(inb(Pdata) & 0x80) + continue; + + /* read clock values */ + outb(Paddr, Seconds); bcdclock[0] = inb(Pdata); + outb(Paddr, Minutes); bcdclock[1] = inb(Pdata); + outb(Paddr, Hours); bcdclock[2] = inb(Pdata); + outb(Paddr, Mday); bcdclock[3] = inb(Pdata); + outb(Paddr, Month); bcdclock[4] = inb(Pdata); + outb(Paddr, Year); bcdclock[5] = inb(Pdata); + + outb(Paddr, Status); + if((inb(Pdata) & 0x80) == 0) + break; + } + + /* + * convert from BCD + */ + rtc.sec = GETBCD(0); + rtc.min = GETBCD(1); + rtc.hour = GETBCD(2); + rtc.mday = GETBCD(3); + rtc.mon = GETBCD(4); + rtc.year = GETBCD(5); + + /* + * the world starts jan 1 1970 + */ + if(rtc.year < 70) + rtc.year += 2000; + else + rtc.year += 1900; + return rtc2sec(&rtc); +} + +Timet +rtctime(void) +{ + int i; + Timet t, ot; + + ilock(&rtclock); + + /* loop till we get two reads in a row the same */ + t = _rtctime(); + for(i = 0; i < 100; i++){ + ot = t; + t = _rtctime(); + if(ot == t) + break; + } + iunlock(&rtclock); + + return t; +} + +uchar +nvramread(int addr) +{ + uchar data; + + ilock(&rtclock); + outb(Paddr, addr); + data = inb(Pdata); + iunlock(&rtclock); + + return data; +} + +void +nvramwrite(int addr, uchar data) +{ + ilock(&rtclock); + outb(Paddr, addr); + outb(Pdata, data); + iunlock(&rtclock); +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,539 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define iprint(...) print(__VA_ARGS__) +#define up u +#define xalloc(x) ialloc(x, 0) + +static void debugbpt(Ureg*, void*); +static void faultamd64(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void expected(Ureg*, void*); +static void dumpstackwithureg(Ureg*); + +static Lock vctllock; +/*static*/ Vctl *vctl[256]; + +typedef struct Intrtime Intrtime; +struct Intrtime { + uvlong count; + uvlong cycles; +}; +static Intrtime intrtimes[256]; +static Intrtime machtimes[MACHMAX]; + +static int trapinited; +extern int ioapicintrenable(Vctl*); + +int +intraffinity(void *vv) +{ + Vctl *v; + + v = vv; + return v->affinity; +} + +void* +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + extern int ioapicintrenable(Vctl*); + + if(f == nil){ + print("intrenable: nil handler for %d, tbdf %#ux for %s\n", + irq, tbdf, name); + return nil; + } + + v = ialloc(sizeof(Vctl), 0); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->affinity = -1; + v->f = f; + v->a = a; + strncpy(v->name, name, NAMELEN-1); + v->name[NAMELEN-1] = 0; + + ilock(&vctllock); + vno = ioapicintrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf %#ux for %s\n", + irq, tbdf, v->name); + // free(v); + return nil; + } + if(vno >= nelem(vctl)) + panic("vno: %d\n", vno); + if(vctl[vno] != nil){ + if(vctl[v->vno]->isr != v->isr || vctl[v->vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[v->vno]->name, v->name, + vctl[v->vno]->isr, v->isr, vctl[v->vno]->eoi, v->eoi); + } + v->vno = vno; + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->mask) + v->mask(v, 0); + + /* + * Return the assigned vector so intrdisable can find + * the handler; the IRQ is useless in the wonderful world + * of the IOAPIC. + */ + return v; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= 256) + panic("trapenable: vno %d", vno); + v = xalloc(sizeof(Vctl)); + v->type = "trap"; + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, NAMELEN); + v->name[NAMELEN-1] = 0; + + ilock(&vctllock); + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->next) + panic("trapenable: chained trap %d %s from %#p\n", + vno, name, getcallerpc(&vno)); +} + +static void +nmienable(void) +{ + int x; + + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +} + +void +trapinit(void) +{ + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(VectorBPT, debugbpt, 0, "#BP"); + trapenable(VectorPF, faultamd64, 0, "#PF"); + trapenable(Vector2F, doublefault, 0, "#DF"); + trapenable(Vector15, unexpected, 0, "#15"); + trapenable(IdtIPI, expected, 0, "#IPI"); + nmienable(); + + trapinited = 1; +} + +static char* excname[32] = { + "#DE", /* Divide-by-Zero Error */ + "#DB", /* Debug */ + "#NMI", /* Non-Maskable-Interrupt */ + "#BP", /* Breakpoint */ + "#OF", /* Overflow */ + "#BR", /* Bound-Range */ + "#UD", /* Invalid-Opcode */ + "#NM", /* Device-Not-Available */ + "#DF", /* Double-Fault */ + "#9 (reserved)", + "#TS", /* Invalid-TSS */ + "#NP", /* Segment-Not-Present */ + "#SS", /* Stack */ + "#GP", /* General-Protection */ + "#PF", /* Page-Fault */ + "#15 (reserved)", + "#MF", /* x87 FPE-Pending */ + "#AC", /* Alignment-Check */ + "#MC", /* Machine-Check */ + "#XF", /* SIMD Floating-Point */ + "#20 (reserved)", + "#21 (reserved)", + "#22 (reserved)", + "#23 (reserved)", + "#24 (reserved)", + "#25 (reserved)", + "#26 (reserved)", + "#27 (reserved)", + "#28 (reserved)", + "#29 (reserved)", + "#30 (reserved)", + "#31 (reserved)", +}; + +/* + * keep interrupt service times and counts + */ +void +intrtime(int vno) +{ + uvlong diff, x; + Intrtime *t; + + x = rdtsc(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + t = intrtimes + vno; + t->cycles += diff; + t->count++; + t = machtimes + m->machno; + t->cycles += diff; + t->count++; +} + +/* + * All traps come here. It is slower to have all traps call trap() + * rather than directly vectoring the handler. However, this avoids a + * lot of code duplication and possible bugs. The only exception is + * VectorSYSCALL. + * Trap is called with interrupts disabled via interrupt-gates. + */ +void +trap(Ureg* ureg) +{ + int clockintr, vno; + Vctl *ctl, *v; + + if(!trapinited){ + /* fault can give a better error message */ + if(ureg->type == VectorPF) + faultamd64(ureg, nil); + if(ureg->type < nelem(excname)) + panic("trap %llud: %s: not ready %#p", ureg->type, excname[ureg->type], getcr2()); + else + panic("trap %llud: not ready", ureg->type); + } + + m->perf.intrts = rdtsc(); + clockintr = 0; + + vno = ureg->type; + if(ctl = vctl[vno]){ + if(ctl->isintr){ + // m->intr++; + if(vno >= VectorPIC && vno != VectorSYSCALL) + m->lastintr = ctl->irq; + } + + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + if(ctl->eoi) + ctl->eoi(vno); + + if(ctl->isintr){ + intrtime(vno); + + if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) + clockintr = 1; + + // if(up && !clockintr) + // preempted(); + } + } + else if(vno >= VectorPIC && vno != VectorSYSCALL){ + /* + * An unknown interrupt. + * Check for a default IRQ7. This can happen when + * the IRQ input goes away before the acknowledge. + * In this case, a 'default IRQ7' is generated, but + * the corresponding bit in the ISR isn't set. + * In fact, just ignore all such interrupts. + */ + + /* clear the interrupt */ + // i8259isr(vno); +lapiceoi(0); + print("cpu%d: spurious interrupt %d, last %d\n", + m->machno, vno, m->lastintr); + m->spuriousintr++; + intrtime(vno); + return; + } + else{ + if(vno == VectorNMI){ + nmienable(); + if(m->machno != 0){ + iprint("cpu%d: PC %#p\n", + m->machno, ureg->ip); + for(;;); + } + } + dumpregs(ureg); + if(vno < nelem(excname)) + panic("%s", excname[vno]); + panic("unknown trap/intr: %d", vno); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + sched(); + splhi(); + } +} + +void +trapstats(void) +{ +} + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + ureg.ip = getcallerpc(&fn); + ureg.sp = PTR2UINT(&fn); + fn(&ureg); +} + +static void +dumpstackwithureg(Ureg* ureg) +{ + char *s; + uintptr l, v, i, estack; + extern ulong etext; /* ahem */ + + if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){ + iprint("dumpstack disabled\n"); + return; + } + iprint("dumpstack\n"); +prflush(); + iprint("ktrace 9%s %#p %#p\n", "iveyfs", ureg->ip, ureg->sp); + i = 0; + if(u != nil + && (uintptr)&l >= (uintptr)u->stack + && (uintptr)&l <= (uintptr)u->stack+MAXSTACK) + estack = (uintptr)u->stack+KSTACK; + /* botch — where's the mach stack!? */ + else if((uintptr)&l >= m->stack && (uintptr)&l <= m->stack+MACHSTKSZ) + estack = m->stack+MACHSTKSZ; + else{ + if(u != nil) + iprint("&u->stack %#p &l %#p\n", u->stack, &l); + else + iprint("&m %#p &l %#p\n", m, &l); + return; + } + iprint("estackx %#p\n", estack); +prflush(); + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + v = *(uintptr*)l; + if((KTZERO < v && v < (uintptr)&etext) + || ((uintptr)&l < v && v < estack) || estack-l < 256){ + iprint("%#16.16p=%#16.16p ", l, v); + i++; + } + if(i == 2){ + i = 0; + iprint("\n"); + prflush(); + delay(10); + } + } + if(i) + iprint("\n"); +prflush(); +} + +void +dumpstack(User *) +{ + callwithureg(dumpstackwithureg); +} + +void +dumpstack_(void) +{ + callwithureg(dumpstackwithureg); +} + +static void +debugbpt(Ureg*, void*) +{ + panic("kernel bpt"); +} + +void dumpgpr(Ureg* ureg); +static void +doublefault(Ureg *ureg, void*) +{ + dumpgpr(ureg); + panic("cpu%d: double fault: pc %#p", m->machno, ureg->ip); +} + +static void +unexpected(Ureg* ureg, void*) +{ + iprint("cpu%d: unexpected trap %llud; ignoring\n", m->machno, ureg->type); +} + +static void +expected(Ureg*, void*) +{ +} + +static void +faultamd64(Ureg* ureg, void*) +{ + int pid; + u64int addr; + + addr = getcr2(); + pid = -1; + if(up) + pid = up->pid; + panic("cpu%d: fault with up %d; pc %#p addr %#p\n", m->machno, pid, ureg->ip, addr); +} + +/* + * Dump general registers. + */ +void +dumpgpr(Ureg* ureg) +{ + if(up != nil) + iprint("cpu%d: registers for %s %d\n", + m->machno, up->text, up->pid); + else + iprint("cpu%d: registers for kernel\n", m->machno); + + iprint("ax\t%#16.16llux\n", ureg->ax); + iprint("bx\t%#16.16llux\n", ureg->bx); + iprint("cx\t%#16.16llux\n", ureg->cx); + iprint("dx\t%#16.16llux\n", ureg->dx); + iprint("di\t%#16.16llux\n", ureg->di); + iprint("si\t%#16.16llux\n", ureg->si); + iprint("bp\t%#16.16llux\n", ureg->bp); + iprint("r8\t%#16.16llux\n", ureg->r8); + iprint("r9\t%#16.16llux\n", ureg->r9); + iprint("r10\t%#16.16llux\n", ureg->r10); + iprint("r11\t%#16.16llux\n", ureg->r11); + iprint("r12\t%#16.16llux\n", ureg->r12); + iprint("r13\t%#16.16llux\n", ureg->r13); + iprint("r14\t%#16.16llux\n", ureg->r14); + iprint("r15\t%#16.16llux\n", ureg->r15); + iprint("ds %#4.4ux es %#4.4ux fs %#4.4ux gs %#4.4ux\n", + ureg->ds, ureg->es, ureg->fs, ureg->gs); + iprint("ureg fs\t%#ux\n", *(unsigned int *)&ureg->ds); + iprint("type\t%#llux\n", ureg->type); + iprint("error\t%#llux\n", ureg->error); + iprint("pc\t%#llux\n", ureg->ip); + iprint("cs\t%#llux\n", ureg->cs); + iprint("flags\t%#llux\n", ureg->flags); + iprint("sp\t%#llux\n", ureg->sp); + iprint("ss\t%#llux\n", ureg->ss); + iprint("type\t%#llux\n", ureg->type); +// iprint("FS\t%#llux\n", rdmsr(FSbase)); +// iprint("GS\t%#llux\n", rdmsr(GSbase)); + + iprint("m\t%#16.16p\nup\t%#16.16p\n", m, up); +} + +void +dumpregs(Ureg* ureg) +{ + dumpgpr(ureg); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + iprint("cr0\t%#16.16llux\n", getcr0()); + iprint("cr2\t%#16.16llux\n", getcr2()); + iprint("cr3\t%#16.16llux\n", getcr3()); + +// archdumpregs(); +} + +void +cmd_machvec(int, char**) +{ + int i; + Mach *mach; + Intrtime *t; + + print("%s %-10s %-18s\n", "mach", "count", "cycles"); + prflush(); + + for(i = 0; i < conf.nmach; i++){ + if((mach = sys->machptr[i]) == nil) + continue; + t = machtimes + i; + print("%d/%d %-10lld %-18lld\n", i, mach->machno, t->count, t->cycles); + prflush(); + } +} + +/*static*/ void +cmd_vec(int argc, char **argv) +{ + char aff[8], tbdf[16], *prefix; + int i; + Vctl *v; + Intrtime *t; + + USED(argc, argv); + print("%s %s %-16s %-10s %-18s %s %s\n", + "vec", "aff", "tbdf", "count", "cycles", "type", "name"); + prflush(); + + for(i = 0; i < 256; i++){ + prefix = ""; + t = intrtimes + i; + for(v = vctl[i]; v != nil; v = v->next){ + if(strcmp(v->type, "trap") == 0 || strcmp(v->type, "lapic") == 0){ + snprint(aff, sizeof aff, "--"); + snprint(tbdf, sizeof tbdf, "--"); + } + else{ + snprint(aff, sizeof aff, "%d", v->affinity); + snprint(tbdf, sizeof tbdf, "%τ", v->tbdf); + } + print("%s%d %s %-16s %-10lld %-18lld %s %s\n", + prefix, i, aff, tbdf, t->count, t->cycles, v->type, v->name); + prflush(); + prefix = "*"; + } + } +} --- /sys/src/fs/amd64 Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64 Tue Aug 27 15:55:01 2013 @@ -0,0 +1,189 @@ +/* + * Vestigial Segmented Virtual Memory. + * To do: + * dynamic allocation and free of descriptors; + * IST should perhaps point to a different handler; + * user-level descriptors (if not dynamic). + */ +#include "all.h" + +#include "amd64.h" +#include "ureg.h" + +typedef struct Gd Gd; +typedef u64int Sd; +typedef u16int Ss; +typedef struct Tss Tss; + +struct Gd { + Sd sd; + u64int hi; +}; + +struct Tss { + u32int _0_; + u32int rsp0[2]; + u32int rsp1[2]; + u32int rsp2[2]; + u32int _28_[2]; + u32int ist[14]; + u16int _92_[5]; + u16int iomap; +}; + +enum { + Ngdt = 16, /* max. entries in gdt */ + Nidt = 256, /* max. entries in idt */ +}; + +static Sd gdt64[Ngdt] = { + 0ull, /* NULL descriptor */ + SdL|SdP|SdDPL0|SdS|SdCODE, /* CS */ + SdG|SdD|SdP|SdDPL0|SdS|SdW, /* DS */ + SdG|SdD|SdP|SdDPL3|SdS|SdCODE|SdR|Sd4G, /* User CS 32-bit */ + SdG|SdD|SdP|SdDPL3|SdS|SdW|Sd4G, /* User DS */ + SdL|SdP|SdDPL3|SdS|SdCODE, /* User CS 64-bit */ + + 0ull, /* FS */ + 0ull, /* GS */ + + 0ull, /* TSS lower */ + 0ull, /* TSS upper */ +}; +static int ngdt64 = 10; + +static Gd idt64[Nidt]; + +static Sd +mksd(u64int base, u64int limit, u64int bits, u64int* upper) +{ + Sd sd; + + sd = bits; + sd |= (((limit & 0x00000000000f0000ull)>>16)<<48) + |(limit & 0x000000000000ffffull); + sd |= (((base & 0x00000000ff000000ull)>>24)<<56) + |(((base & 0x0000000000ff0000ull)>>16)<<32) + |((base & 0x000000000000ffffull)<<16); + if(upper != nil) + *upper = base>>32; + + return sd; +} + +static void +mkgd(Gd* gd, u64int offset, Ss ss, u64int bits, int ist) +{ + Sd sd; + + sd = bits; + sd |= (((offset & 0x00000000ffff0000ull)>>16)<<48) + |(offset & 0x000000000000ffffull); + sd |= ((ss & 0x000000000000ffffull)<<16); + sd |= (ist & (SdISTM>>32))<<32; + gd->sd = sd; + gd->hi = offset>>32; +} + +static void +idtinit(Gd *gd, uintptr offset) +{ + int ist, v; + u64int dpl; + + for(v = 0; v < Nidt; v++){ + ist = 0; + dpl = SdP|SdDPL0|SdIG; + switch(v){ + default: + break; + case IdtBP: /* #BP */ + dpl = SdP|SdDPL3|SdIG; + break; + case IdtDF: /* #DF */ + ist = 1; + break; + } + mkgd(gd, offset, SSEL(SiCS, SsTIGDT|SsRPL0), dpl, ist); + gd++; + offset += 6; + } +} + +void +tssrsp0(uintptr sp) +{ + Tss *tss; + + tss = m->tss; + tss->rsp0[0] = sp; + tss->rsp0[1] = sp>>32; +} + +static void +tssinit(uintptr sp) +{ + int ist; + Tss *tss; + + tss = m->tss; + memset(tss, 0, sizeof(Tss)); + + tssrsp0(sp); + + sp = PTR2UINT(m->vsvm+PGSZ); + for(ist = 0; ist < 14; ist += 2){ + tss->ist[ist] = sp; + tss->ist[ist+1] = sp>>32; + } + tss->iomap = 0xdfff; +} + +static void +syscallentry(void) +{ + print("syscall: *gok*\n"); +} + +void +vsvminit(int size) +{ + Sd *sd; + u64int r; + + if(m->machno == 0){ + idtinit(idt64, PTR2UINT(idthandlers)); + } + + m->gdt = m->vsvm; + memmove(m->gdt, gdt64, sizeof(gdt64)); + m->tss = &m->vsvm[ROUNDUP(sizeof(gdt64), 16)]; + + sd = &((Sd*)m->gdt)[SiTSS]; + *sd = mksd(PTR2UINT(m->tss), sizeof(Tss)-1, SdP|SdDPL0|SdaTSS, sd+1); + + tssinit(m->stack+size); + + gdtput(sizeof(gdt64)-1, PTR2UINT(m->gdt), SSEL(SiCS, SsTIGDT|SsRPL0)); + idtput(sizeof(idt64)-1, PTR2UINT(idt64)); + trput(SSEL(SiTSS, SsTIGDT|SsRPL0)); + + wrmsr(FSbase, 0ull); + wrmsr(GSbase, PTR2UINT(&sys->machptr[m->machno])); + wrmsr(KernelGSbase, 0ull); + + r = rdmsr(Efer); + r |= Sce; + wrmsr(Efer, r); + r = ((u64int)SSEL(SiU32CS, SsRPL3))<<48; + r |= ((u64int)SSEL(SiCS, SsRPL0))<<32; + wrmsr(Star, r); + wrmsr(Lstar, PTR2UINT(syscallentry)); + wrmsr(Sfmask, If); +} + +//int +//userureg(Ureg* ureg) +//{ +// return ureg->cs == SSEL(SiUCS, SsRPL3); +//} --- /sys/src/fs/amd64/8250.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/8250.c Tue Aug 27 16:10:44 2013 @@ -0,0 +1,402 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" + +enum { + Development = 1, /* i.e., debugging */ + DLE = 0x10, /* ^p == DLE */ + Asciimask = 0x7f, +}; + +/* + * INS8250 uart + */ +enum +{ + /* + * register numbers + */ + Data= 0, /* xmit/rcv buffer */ + Iena= 1, /* interrupt enable */ + Ircv= (1<<0), /* for char rcv'd */ + Ixmt= (1<<1), /* for xmit buffer empty */ + Irstat=(1<<2), /* for change in rcv'er status */ + Imstat=(1<<3), /* for change in modem status */ + Istat= 2, /* interrupt flag (read) */ + Fenabd=(3<<6), /* on if fifo's enabled */ + Fifoctl=2, /* fifo control (write) */ + Fena= (1<<0), /* enable xmit/rcv fifos */ + Ftrig= (1<<6), /* trigger after 4 input characters */ + Fclear=(3<<1), /* clear xmit & rcv fifos */ + Format= 3, /* byte format */ + Bits8= (3<<0), /* 8 bits/byte */ + Stop2= (1<<2), /* 2 stop bits */ + Pena= (1<<3), /* generate parity */ + Peven= (1<<4), /* even parity */ + Pforce=(1<<5), /* force parity */ + Break= (1<<6), /* generate a break */ + Dra= (1<<7), /* address the divisor */ + Mctl= 4, /* modem control */ + Dtr= (1<<0), /* data terminal ready */ + Rts= (1<<1), /* request to send */ + Ri= (1<<2), /* ring */ + Inton= (1<<3), /* turn on interrupts */ + Loop= (1<<4), /* loop back */ + Lstat= 5, /* line status */ + Inready=(1<<0), /* receive buffer full */ + Oerror=(1<<1), /* receiver overrun */ + Perror=(1<<2), /* receiver parity error */ + Ferror=(1<<3), /* rcv framing error */ + Outready=(1<<5), /* output buffer empty */ + Mstat= 6, /* modem status */ + Ctsc= (1<<0), /* clear to send changed */ + Dsrc= (1<<1), /* data set ready changed */ + Rire= (1<<2), /* rising edge of ring indicator */ + Dcdc= (1<<3), /* data carrier detect changed */ + Cts= (1<<4), /* complement of clear to send line */ + Dsr= (1<<5), /* complement of data set ready line */ + Ring= (1<<6), /* complement of ring indicator line */ + Dcd= (1<<7), /* complement of data carrier detect line */ + Scratch=7, /* scratchpad */ + Dlsb= 0, /* divisor lsb */ + Dmsb= 1, /* divisor msb */ + + Serial= 0, + Modem= 1, +}; + +typedef struct Uart Uart; +struct Uart +{ + int port; + uchar sticky[8]; /* sticky write register values */ + int nofifo; + + void (*rx)(int); /* routine to take a received character */ + int (*tx)(void); /* routine to get a character to transmit */ + + uint frame; + uint overrun; +}; + +/* externally-visible console-on-a-uart flag */ +int uartcons; + +Uart uart[2]; + +#define UartFREQ 1843200 + +#define uartwrreg(u,r,v) outb((u)->port + r, (u)->sticky[r] | (v)) +#define uartrdreg(u,r) inb((u)->port + r) + +/* + * set the baud rate by calculating and setting the baudrate + * generator constant. This will work with fairly non-standard + * baud rates. + */ +static void +uartsetbaud(Uart *up, int rate) +{ + uint brconst; + + brconst = (UartFREQ+8*rate-1)/(16*rate); + + uartwrreg(up, Format, Dra); + outb(up->port+Dmsb, (brconst>>8) & 0xff); + outb(up->port+Dlsb, brconst & 0xff); + uartwrreg(up, Format, 0); +} + +/* + * toggle DTR + */ +static void +uartdtr(Uart *up, int n) +{ + if(n) + up->sticky[Mctl] |= Dtr; + else + up->sticky[Mctl] &= ~Dtr; + uartwrreg(up, Mctl, 0); +} + +/* + * toggle RTS + */ +static void +uartrts(Uart *up, int n) +{ + if(n) + up->sticky[Mctl] |= Rts; + else + up->sticky[Mctl] &= ~Rts; + uartwrreg(up, Mctl, 0); +} + +/* + * Enable/disable FIFOs (if possible). + */ +static void +uartfifo(Uart *up, int n) +{ + int i; + Mpl s; + + if(up->nofifo) + return; + + s = splhi(); + + /* reset fifos */ + uartwrreg(up, Fifoctl, Fclear); + + /* empty buffer and interrupt conditions */ + for(i = 0; i < 16; i++){ + uartrdreg(up, Istat); + uartrdreg(up, Data); + } + + /* turn on fifo */ + if(n){ + uartwrreg(up, Fifoctl, Fena|Ftrig); + + if((uartrdreg(up, Istat) & Fenabd) == 0){ + /* didn't work, must be an earlier chip type */ + up->nofifo = 1; + } + } + + splx(s); +} + +static int +uartshift(void) +{ + return -1; +} + +static void +uartintr(Ureg *ur, void *arg) +{ + Uart *up; + int ch; + int s, l, loops; + + USED(ur); + + up = arg; + for(loops = 0; loops < 1024; loops++){ + s = uartrdreg(up, Istat); + switch(s & 0x3F){ + case 6: /* receiver line status */ + l = uartrdreg(up, Lstat); + if(l & Ferror) + up->frame++; + if(l & Oerror) + up->overrun++; + break; + + case 4: /* received data available */ + case 12: + ch = inb(up->port+Data); + if (Development && (ch & Asciimask) == DLE) + firmware(); + if(up->rx) + (*up->rx)(ch & Asciimask); + break; + + case 2: /* transmitter empty */ + ch = -1; + if(up->tx) + ch = (*up->tx)(); + if(ch != -1) + outb(up->port+Data, ch); + break; + + case 0: /* modem status */ + uartrdreg(up, Mstat); + break; + + default: + if(s&1) + return; + print("weird modem interrupt #%2.2ux\n", s); + break; + } + } + panic("uartintr: 0x%2.2ux\n", uartrdreg(up, Istat)); +} + +/* + * turn on a port's interrupts. set DTR and RTS + */ +static void +uartenable(Uart *up) +{ + /* + * turn on interrupts + */ + up->sticky[Iena] = 0; + if(up->tx) + up->sticky[Iena] |= Ixmt; + if(up->rx) + up->sticky[Iena] |= Ircv|Irstat; + + /* + * turn on DTR and RTS + */ + uartdtr(up, 1); + uartrts(up, 1); + uartfifo(up, 1); + + uartwrreg(up, Iena, 0); +} + +void +uartspecial(int port, void (*rx)(int), int (*tx)(void), int baud) +{ + Uart *up = &uart[0]; + + if(up->port) + return; + + switch(port){ + case 0: + up->port = 0x3F8; + intrenable(IrqUART0, uartintr, up, BUSUNKNOWN, "eia0"); + break; + + case 1: + up->port = 0x2F8; + intrenable(IrqUART1, uartintr, up, BUSUNKNOWN, "eia1"); + break; + + default: + return; + } + + /* + * set rate to 115200 baud. + * 8 bits/character. + * 1 stop bit. + * interrupts enabled. + */ + uartsetbaud(up, 115200); + up->sticky[Format] = Bits8; + uartwrreg(up, Format, 0); + up->sticky[Mctl] |= Inton; + uartwrreg(up, Mctl, 0x0); + + if(tx == nil) + tx = uartshift; + + up->rx = rx; + up->tx = tx; + uartenable(up); + if(baud) + uartsetbaud(up, baud); + uartcons = 1; +} + +int +uartgetc(void) +{ + Uart *up = &uart[0]; + + if(conf.useuart && uartrdreg(up, Lstat) & Inready) + return inb(up->port+Data); + return 0; +} + +void +uartputc(int c) +{ + Uart *up = &uart[0]; + int i; + + if(conf.useuart == 0) + return; + + for(i = 0; i < 100; i++){ + if(uartrdreg(up, Lstat) & Outready) + break; + microdelay(100); + } + outb(up->port+Data, c); +} + +void +uartputs(char *s, int n) +{ + int i; + + for(i = 0; i < n; i++) + uartputc(s[i]); +} + +void +uartspecial1(int port, void (*rx)(int), int (*tx)(void), int baud) +{ + Uart *up = &uart[1]; + + if(up->port) + return; + + switch(port){ + + case 0: + up->port = 0x3F8; + intrenable(IrqUART0, uartintr, up, BUSUNKNOWN, "eia0"); + break; + + case 1: + up->port = 0x2F8; + intrenable(IrqUART1, uartintr, up, BUSUNKNOWN, "eia1"); + break; + + default: + return; + } + + /* + * set rate to 9600 baud. + * 8 bits/character. + * 1 stop bit. + * interrupts enabled. + */ + uartsetbaud(up, 9600); + up->sticky[Format] = Bits8; + uartwrreg(up, Format, 0); + up->sticky[Mctl] |= Inton; + uartwrreg(up, Mctl, 0x0); + + up->rx = rx; + up->tx = tx; + uartenable(up); + if(baud) + uartsetbaud(up, baud); +} + +int +uartgetc1(void) +{ + Uart *up = &uart[1]; + + if(uartrdreg(up, Lstat) & Inready) + return inb(up->port+Data); + return 0; +} + +void +uartputc1(int c) +{ + Uart *up = &uart[1]; + int i; + + for(i = 0; i < 100; i++){ + if(uartrdreg(up, Lstat) & Outready) + break; + delay(1); + } + outb(up->port+Data, c); +} --- /sys/src/fs/amd64/l32p.s Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/l32p.s Tue Aug 27 16:10:44 2013 @@ -0,0 +1,230 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $32 + +#define pFARJMP32(s, o) BYTE $0xea; /* far jump to ptr32:16 */\ + LONG $o; WORD $s + +/* + * Enter here in 32-bit protected mode. Welcome to 1982. + * Make sure the GDT is set as it should be: + * disable interrupts; + * load the GDT with the table in _gdt32p; + * load all the data segments + * load the code segment via a far jump. + */ +TEXT _protected<>(SB), 1, $-4 + CLI + BYTE $0xe9; LONG $0x4c; /* JMP _endofheader */ + +_startofheader: + BYTE $0x90 /* NOP */ + BYTE $0x90 /* NOP */ + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(2*8-1) + QUAD $_gdt64<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +_endofheader: + MOVL AX, BP /* possible passed-in magic */ + + MOVL $_gdtptr32p<>-KZERO(SB), AX + MOVL (AX), GDTR + + MOVL $SSEL(SiDS, SsTIGDT|SsRPL0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + pFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _warp64<>-KZERO(SB)) + +/* + * Make the basic page tables for CPU0 to map 0-4MiB physical + * to KZERO, and include an identity map for the switch from protected + * to paging mode. There's an assumption here that the creation and later + * removal of the identity map will not interfere with the KZERO mappings; + * the conditions for clearing the identity map are + * clear PML4 entry when (KZERO & 0x0000ff8000000000) != 0; + * clear PDP entry when (KZERO & 0x0000007fc0000000) != 0; + * don't clear PD entry when (KZERO & 0x000000003fe00000) == 0; + * the code below assumes these conditions are met. + * + * Assume a recent processor with Page Size Extensions + * and use two 2MiB entries. + */ +/* + * The layout is decribed in dat.h: + * - MACHSTKSZ stack + * - PTSZ PT for PMAPADDR unused - assumes in KZERO PD + * - PTSZ PD + * - PTSZ PDP + * - PTSZ PML4 + * - 4*KiB vsvmpage for gdt, tss + * - MACHSZ m + * - 4*KiB syspage + * - 4*KiB ptrpage + * - 4*KiB unused + * - 4*KiB unused + * _protected: start of kernel text + */ + +/* + * Macros for accessing page table entries; change the + * C-style array-index macros into a page table byte offset + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _warp64<>(SB), 1, $-4 + MOVL $_protected<>-(MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ+KZERO)(SB), SI + + MOVL SI, DI + XORL AX, AX + MOVL $((MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ)>>2), CX + + CLD + REP; STOSL /* stack, P*, vsvm, m, sys */ + + MOVL SI, AX /* sys-KZERO */ + ADDL $(MACHSTKSZ), AX /* PML4 */ + MOVL AX, CR3 /* load the mmu */ + MOVL AX, DX + ADDL $(PTSZ|PteRW|PteP), DX /* PDP at PML4 + PTSZ */ + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + MOVL DX, PML4O(KZERO)(AX) /* PML4E for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PDP at PML4 + PTSZ */ + ADDL $PTSZ, DX /* PD at PML4 + 2*PTSZ */ + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL DX, PDPO(KZERO)(AX) /* PDPE for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + MOVL $(PtePS|PteRW|PteP), DX + MOVL DX, PDO(0)(AX) /* PDE for identity 0-2MiB */ + + MOVL AX, CX + ADDL $PDO(KZERO), CX +memloop: + MOVL DX, 0(CX) + ADDL $PGLSZ(1), DX + ADDL $8, CX + CMPL DX, $INIMAP + JLT memloop + + MOVL AX, DX /* PD at PML4 + 2*PTSZ */ + ADDL $(PTSZ|PteRW|PteP), DX /* PT at PML4 + 3*PTSZ */ + MOVL DX, PDO(PMAPADDR)(AX) /* PDE for PMAPADDR */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* sys-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX + MOVQ AX, sys(SB) /* sys */ + + ADDQ $(MACHSTKSZ), AX /* PML4 and top of stack */ + MOVQ AX, SP /* set stack */ + +_zap0pml4: + CMPQ DX, $PML4O(KZERO) /* KZERO & 0x0000ff8000000000 */ + JEQ _zap0pdp + MOVQ DX, PML4O(0)(AX) /* zap identity map PML4E */ +_zap0pdp: + ADDQ $PTSZ, AX /* PDP at PML4 + PTSZ */ + CMPQ DX, $PDPO(KZERO) /* KZERO & 0x0000007fc0000000 */ + JEQ _zap0pd + MOVQ DX, PDPO(0)(AX) /* zap identity map PDPE */ +_zap0pd: + ADDQ $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + CMPQ DX, $PDO(KZERO) /* KZERO & 0x000000003fe00000 */ + JEQ _zap0done + MOVQ DX, PDO(0)(AX) /* zap identity map PDE */ +_zap0done: + + ADDQ $(MACHSTKSZ), SI /* PML4-KZERO */ + MOVQ SI, CR3 /* flush TLB */ + + ADDQ $(2*PTSZ+4*KiB), AX /* PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + CALL main(SB) + +TEXT ndnr(SB), 1, $-4 /* no deposit, no return */ +_dnr: + STI + HLT + JMP _dnr /* do not resuscitate */ --- /sys/src/fs/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/mkfile Tue Aug 27 16:10:44 2013 @@ -0,0 +1,24 @@ +PCCFILES=`{builtin cd ../amd64;echo *.c | sed 's/ /|/g; s/\.c//g'} +PCSFILES=`{builtin cd ../amd64;echo *.s | sed 's/ /|/g; s/\.s//g'} +ARCHCLEAN=l64sipi.out sipi.h + +^($PCCFILES)\.$O:R: '../amd64/\1.c' + $CC $CFLAGS ../amd64/$stem1.c + +^($PCSFILES)\.$O:R: '../amd64/\1.s' + $AS ../amd64/$stem1.s + +$ETHER: ../amd64/etherif.h + +dosfs.$O nvr.$O: ../amd64/dosfs.h + +sipi.h:D: l64sipi.$O + $LD -o l64sipi.out -T0xfffffffff0003000 -R4 -l -s $prereq + {echo 'uchar sipihandler[]={' + xd -1x l64sipi.out | + sed -e 's/^[0-9a-f]+ //' \ + -e '1,2d' -e '3s/^ .. .. .. .. .. .. .. ..//' \ + -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > $target + +sipi.$O: sipi.h --- /sys/src/fs/amd64/fns.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/fns.h Tue Aug 27 16:10:45 2013 @@ -0,0 +1,122 @@ +#include "../port/portfns.h" + +/* junk */ +void waveprint(char*, ...); +void wave(int); +#pragma varargck argpos waveprint 1 + +void vunmap(void*, usize); +#define machcolor(m) -1 +void acpiinit(int); + +int adec(int*); +int ainc(int*); +void apmmuinit(void); +int archmmu(void); +#define BIOSSEG(a) KADDR(((uint)(a))<<4) +void cgaputc(int); +void cgaputs(char*, int); +void cmd_e820(int, char**); +#define coherence() mfence(); +void cpuid(Cpuidreg*); +void cycles(uvlong*); +void etherinit(void); +void etherstart(void); +void fpuinit(void); +void gdtput(int, u64int, u16int); +char* getconf(char*); +u64int getcr0(void); +u64int getcr2(void); +u64int getcr3(void); +u64int getcr4(void); +void halt(void); +void hardhalt(void); +void i8042a20(void); +void i8042reset(void); +void idle(void); +void idthandlers(void); +void idtput(int, u64int); +int inb(int); +u32int inl(int); +void insb(int, void*, int); +ushort ins(int); +void insl(int, void*, int); +void inss(int, void*, int); +int islo(void); +#define KADDR(pa) kaddr(pa) +void* kaddr(uintmem); +int kbdgetc(void); +void kbdinit(void); +int kbdintr0(void); +void mfence(void); +void microdelay(int); +void mmuinit(void); +uintmem mmuphysaddr(uintptr); +int mmuwalk(PTE*, uintptr, int, PTE**, uintmem (*)(usize)); +void mpsinit(int); +void ndnr(void); +uchar nvramread(int); +void nvramwrite(int, uchar); +void outb(int, int); +void outl(int, u32int); +void outsb(int, void*, int); +void outs(int, u16int); +void outsl(int, void*, int); +void outss(int, void*, int); +#define PADDR(va) paddr(va) +uintmem paddr(void*); +#define perfticks() rdtsc() +void printcpufreq(void); +void putcr3(u64int); +void putcr4(u64int); +void puttr(u64int); +u64int rdmsr(int); +vlong rdtsc(void); +void sipi(void); +Mpl splhi(void); +Mpl spllo(void); +void splx(Mpl); +int tas32(u32int*); +#define tas(l) tas32((u32int*)l) +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +void trput(u64int); +int uartgetc(void); +void uartputc(int); +void uartputs(char*, int); +void uartspecial(int, void (*)(int), int (*)(void), int); +void* vmappat(uintmem, usize, uint); +int vmapsync(uintptr); +void* vmap(uintmem, usize); +void vsvminit(int); +void wrmsr(int, vlong); + +#define PTR2UINT(p) ((uintptr)(p)) +#define UINT2PTR(i) ((void*)(i)) + +//#define PCIWADDR(a) PADDR(a) + +/* pata */ +void ideinit(Device*); +Devsize idesize(Device*); +int ideread(Device*, Devsize, void*); +int idewrite(Device*, Devsize, void*); +int idesecsize(Device*); + +/* sata */ +void mvinit(Device*); +Devsize mvsize(Device*); +int mvread(Device*, Devsize, void*); +int mvwrite(Device*, Devsize, void*); + +/* aoe */ +void aoeinit(Device*); +Devsize aoesize(Device*); +int aoeread(Device*, Devsize, void*); +int aoewrite(Device*, Devsize, void*); + +/* iasata */ +void iainit(Device*); +Devsize iasize(Device*); +int iaread(Device*, Devsize, void*); +int iawrite(Device*, Devsize, void*); --- /sys/src/fs/amd64/l64cpuid.s Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/l64cpuid.s Tue Aug 27 16:10:45 2013 @@ -0,0 +1,13 @@ +TEXT cpuid(SB), 1, $0 + MOVQ RARG, BP + MOVL 0(BP), AX + MOVL 4(BP), BX + MOVL 8(BP), CX + MOVL 12(BP), DX + CPUID + MOVQ RARG, BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET --- /sys/src/fs/amd64/io.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/io.h Tue Aug 27 16:10:46 2013 @@ -0,0 +1,320 @@ +enum { + VectorNMI = 2, /* non-maskable interrupt */ + VectorBPT = 3, /* breakpoint */ + VectorUD = 6, /* invalid opcode exception */ + VectorCNA = 7, /* coprocessor not available */ + Vector2F = 8, /* double fault */ + VectorCSO = 9, /* coprocessor segment overrun */ + VectorPF = 14, /* page fault */ + Vector15 = 15, /* reserved */ + VectorCERR = 16, /* coprocessor error */ + VectorSIMD = 19, /* SIMD error */ + + VectorPIC = 32, /* external i8259 interrupts */ + IrqCLOCK = 0, + IrqKBD = 1, + IrqUART1 = 3, + IrqUART0 = 4, + IrqPCMCIA = 5, + IrqFLOPPY = 6, + IrqLPT = 7, + IrqIRQ7 = 7, + IrqAUX = 12, /* PS/2 port */ + IrqIRQ13 = 13, /* coprocessor on 386 */ + IrqATA0 = 14, + IrqATA1 = 15, + MaxIrqPIC = 15, + + VectorLAPIC = VectorPIC+16, /* local APIC interrupts */ + IrqLINT0 = VectorLAPIC+0, + IrqLINT1 = VectorLAPIC+1, + IrqTIMER = VectorLAPIC+2, + IrqERROR = VectorLAPIC+3, + IrqPCINT = VectorLAPIC+4, + IrqSPURIOUS = VectorLAPIC+15, + MaxIrqLAPIC = VectorLAPIC+15, + + VectorSYSCALL = 64, + + VectorAPIC = 65, /* external APIC interrupts */ + MaxVectorAPIC = 255, +}; + +enum { + IdtPIC = 32, /* external i8259 interrupts */ + + IdtLINT0 = 48, /* local APIC interrupts */ + IdtLINT1 = 49, + IdtTIMER = 50, + IdtERROR = 51, + IdtPCINT = 52, + + IdtIPI = 62, + IdtSPURIOUS = 63, + + IdtSYSCALL = 64, + + IdtIOAPIC = 65, /* external APIC interrupts */ + + IdtMAX = 255, +}; + +typedef struct Vkey Vkey; +typedef struct Vctl Vctl; + +struct Vkey { + int tbdf; /* pci: ioapic or msi sources */ + int irq; /* 8259-emulating sources */ +}; + +struct Vctl { + Vctl* next; /* handlers on this vector */ + + int isintr; /* interrupt or fault/trap */ + int affinity; /* processor affinity (-1 for none) */ + + Vkey; /* source-specific key; tbdf for pci */ + void (*f)(Ureg*, void*); /* handler to call */ + void* a; /* argument to call it with */ + char name[NAMELEN]; /* of driver */ + char *type; + int (*isr)(int); /* get isr bit for this irq */ + int (*eoi)(int); /* eoi */ + int (*mask)(Vkey*, int); /* interrupt enable returns masked vector */ + int vno; /* cpu vector */ +}; + +enum{ + Pat = 1<<16, + Mmx = 1<<23, + Sse2 = 1<<26, +}; + +void* intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +int intraffinity(void*); + +#define NVRAUTHADDR 0 + +enum { + MaxEther = 8, +}; + +enum { + BusCBUS = 0, /* Corollary CBUS */ + BusCBUSII, /* Corollary CBUS II */ + BusEISA, /* Extended ISA */ + BusFUTURE, /* IEEE Futurebus */ + BusINTERN, /* Internal bus */ + BusISA, /* Industry Standard Architecture */ + BusMBI, /* Multibus I */ + BusMBII, /* Multibus II */ + BusMCA, /* Micro Channel Architecture */ + BusMPI, /* MPI */ + BusMPSA, /* MPSA */ + BusNUBUS, /* Apple Macintosh NuBus */ + BusPCI, /* Peripheral Component Interconnect */ + BusPCMCIA, /* PC Memory Card International Association */ + BusTC, /* DEC TurboChannel */ + BusVL, /* VESA Local bus */ + BusVME, /* VMEbus */ + BusXPRESS, /* Express System Bus */ +}; + +#define MKBUS(t,b,d,f) (((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8)) +#define BUSFNO(tbdf) (((tbdf)>>8)&0x07) +#define BUSDNO(tbdf) (((tbdf)>>11)&0x1F) +#define BUSBNO(tbdf) (((tbdf)>>16)&0xFF) +#define BUSTYPE(tbdf) ((tbdf)>>24) +#define BUSBDF(tbdf) ((tbdf)&0x00FFFF00) +#define BUSUNKNOWN (-1) + +/* + * PCI support code. + */ +enum { /* type 0 and type 1 pre-defined header */ + PciVID = 0x00, /* vendor ID */ + PciDID = 0x02, /* device ID */ + PciPCR = 0x04, /* command */ + PciPSR = 0x06, /* status */ + PciRID = 0x08, /* revision ID */ + PciCCRp = 0x09, /* programming interface class code */ + PciCCRu = 0x0A, /* sub-class code */ + PciCCRb = 0x0B, /* base class code */ + PciCLS = 0x0C, /* cache line size */ + PciLTR = 0x0D, /* latency timer */ + PciHDT = 0x0E, /* header type */ + PciBST = 0x0F, /* BIST */ + + PciBAR0 = 0x10, /* base address */ + PciBAR1 = 0x14, + + PciINTL = 0x3C, /* interrupt line */ + PciINTP = 0x3D, /* interrupt pin */ +}; + +/* capabilities */ +enum { + PciCapPMG = 0x01, /* power management */ + PciCapAGP = 0x02, + PciCapVPD = 0x03, /* vital product data */ + PciCapSID = 0x04, /* slot id */ + PciCapMSI = 0x05, + PciCapCHS = 0x06, /* compact pci hot swap */ + PciCapPCIX = 0x07, + PciCapHTC = 0x08, /* hypertransport irq conf */ + PciCapVND = 0x09, /* vendor specific information */ + PciCapPCIe = 0x10, + PciCapMSIX = 0x11, + PciCapSATA = 0x12, + PciCapHSW = 0x0c, /* hot swap */ +}; + +/* ccrb (base class code) values; controller types */ +enum { + Pcibcpci1 = 0, /* pci 1.0; no class codes defined */ + Pcibcstore = 1, /* mass storage */ + Pcibcnet = 2, /* network */ + Pcibcdisp = 3, /* display */ + Pcibcmmedia = 4, /* multimedia */ + Pcibcmem = 5, /* memory */ + Pcibcbridge = 6, /* bridge */ + Pcibccomm = 7, /* simple comms (e.g., serial) */ + Pcibcbasesys = 8, /* base system */ + Pcibcinput = 9, /* input */ + Pcibcdock = 0xa, /* docking stations */ + Pcibcproc = 0xb, /* processors */ + Pcibcserial = 0xc, /* serial bus (e.g., USB) */ + Pcibcwireless = 0xd, /* wireless */ + Pcibcintell = 0xe, /* intelligent i/o */ + Pcibcsatcom = 0xf, /* satellite comms */ + Pcibccrypto = 0x10, /* encryption/decryption */ + Pcibcdacq = 0x11, /* data acquisition & signal proc. */ +}; + +/* ccru (sub-class code) values; common cases only */ +enum { + /* mass storage */ + Pciscscsi = 0, /* SCSI */ + Pciscide = 1, /* IDE (ATA) */ + + /* network */ + Pciscether = 0, /* Ethernet */ + + /* display */ + Pciscvga = 0, /* VGA */ + Pciscxga = 1, /* XGA */ + Pcisc3d = 2, /* 3D */ + + /* bridges */ + Pcischostpci = 0, /* host/pci */ + Pciscpcicpci = 1, /* pci/pci */ + + /* simple comms */ + Pciscserial = 0, /* 16450, etc. */ + Pciscmultiser = 1, /* multiport serial */ + + /* serial bus */ + Pciscusb = 3, /* USB */ +}; + +enum { /* type 0 pre-defined header */ + PciBAR2 = 0x18, + PciBAR3 = 0x1C, + PciBAR4 = 0x20, + PciBAR5 = 0x24, + PciCIS = 0x28, /* cardbus CIS pointer */ + PciSVID = 0x2C, /* subsystem vendor ID */ + PciSID = 0x2E, /* cardbus CIS pointer */ + PciEBAR0 = 0x30, /* expansion ROM base address */ + PciMGNT = 0x3E, /* burst period length */ + PciMLT = 0x3F, /* maximum latency between bursts */ +}; + +enum { /* type 1 pre-defined header */ + PciPBN = 0x18, /* primary bus number */ + PciSBN = 0x19, /* secondary bus number */ + PciUBN = 0x1A, /* subordinate bus number */ + PciSLTR = 0x1B, /* secondary latency timer */ + PciIBR = 0x1C, /* I/O base */ + PciILR = 0x1D, /* I/O limit */ + PciSPSR = 0x1E, /* secondary status */ + PciMBR = 0x20, /* memory base */ + PciMLR = 0x22, /* memory limit */ + PciPMBR = 0x24, /* prefetchable memory base */ + PciPMLR = 0x26, /* prefetchable memory limit */ + PciPUBR = 0x28, /* prefetchable base upper 32 bits */ + PciPULR = 0x2C, /* prefetchable limit upper 32 bits */ + PciIUBR = 0x30, /* I/O base upper 16 bits */ + PciIULR = 0x32, /* I/O limit upper 16 bits */ + PciEBAR1 = 0x28, /* expansion ROM base address */ + PciBCR = 0x3E, /* bridge control register */ +}; + +typedef struct Pcidev Pcidev; +typedef struct Pcidev { + int tbdf; /* type+bus+device+function */ + ushort vid; /* vendor ID */ + ushort did; /* device ID */ + + struct { + uintmem bar; /* base address */ + int size; + } mem[6], rom, ioa, mema; + + uchar rid; + uchar ccrp; + uchar ccrb; + uchar intl; /* interrupt line */ + uchar ccru; + ushort pcr; + uchar cls; + uchar ltr; + + Pcidev* list; + Pcidev* bridge; /* down a bus */ + Pcidev* link; /* next device on this bno */ +} Pcidev; + +typedef struct Pcisiz Pcisiz; +struct Pcisiz{ + Pcidev* dev; + int siz; + int bar; +}; + +int pcicap(Pcidev*, int); +int pcicfgr8(Pcidev*, int); +int pcicfgr16(Pcidev*, int); +int pcicfgr32(Pcidev*, int); +void pcicfgw8(Pcidev*, int, int); +void pcicfgw16(Pcidev*, int, int); +void pcicfgw32(Pcidev*, int, int); +void pciclrmwi(Pcidev*); +void pcihinv(Pcidev*, uint); +Pcidev* pcimatch(Pcidev*, int, int); +Pcidev* pcimatchtbdf(int); +void pcireset(void); +void pcisetbme(Pcidev*); +void pciclrbme(Pcidev*); + +enum { + Npciopt = 10, + Pcioptlen = 32, +}; + +typedef struct Pciconf Pciconf; +struct Pciconf { + char type[NAMELEN]; + uintmem port; + + int irq; + + int nopt; + char opt[Npciopt][Pcioptlen]; +}; + +extern int pciconfig(char*, int, Pciconf*); +#define PCIWINDOW 0 +#define PCIWADDR(va) (PADDR(va)+PCIWINDOW) +#define Pciwaddrl(va) ((u32int)PCIWADDR(va)) +#define Pciwaddrh(va) ((u32int)(PCIWADDR(va)>>32)) --- /sys/src/fs/amd64/l64idt.s Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/l64idt.s Tue Aug 27 16:10:46 2013 @@ -0,0 +1,341 @@ +/* + * Interrupt/exception handling. + */ +#include "amd64l.h" + +MODE $64 + +TEXT _intrp<>(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* idthandlers(SB) PC */ + JMP _intrcommon + +TEXT _intre<>(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_intrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) /* old CS */ + JEQ _intrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + +// SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_intrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL trap(SB) + +TEXT _intrr<>(SB), 1, $-4 /* so ktrace can pop frame */ + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) + JEQ _iretnested + +// SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_iretnested: + ADDQ $40, SP + IRETQ + +TEXT idthandlers(SB), 1, $-4 + CALL _intrp<>(SB); BYTE $IdtDE /* #DE Divide-by-Zero Error */ + CALL _intrp<>(SB); BYTE $IdtDB /* #DB Debug */ + CALL _intrp<>(SB); BYTE $IdtNMI /* #NMI Borked */ + CALL _intrp<>(SB); BYTE $IdtBP /* #BP Breakpoint */ + CALL _intrp<>(SB); BYTE $IdtOF /* #OF Overflow */ + CALL _intrp<>(SB); BYTE $IdtBR /* #BR Bound-Range */ + CALL _intrp<>(SB); BYTE $IdtUD /* #UD Invalid-Opcode */ + CALL _intrp<>(SB); BYTE $IdtNM /* #NM Device-Not-Available */ + CALL _intre<>(SB); BYTE $IdtDF /* #DF Double-Fault */ + CALL _intrp<>(SB); BYTE $Idt09 /* reserved */ + CALL _intre<>(SB); BYTE $IdtTS /* #TS Invalid-TSS */ + CALL _intre<>(SB); BYTE $IdtNP /* #NP Segment-Not-Present */ + CALL _intre<>(SB); BYTE $IdtSS /* #SS Stack */ + CALL _intre<>(SB); BYTE $IdtGP /* #GP General-Protection */ + CALL _intre<>(SB); BYTE $IdtPF /* #PF Page-Fault */ + CALL _intrp<>(SB); BYTE $Idt0F /* reserved */ + CALL _intrp<>(SB); BYTE $IdtMF /* #MF x87 FPE-Pending */ + CALL _intre<>(SB); BYTE $IdtAC /* #AC Alignment-Check */ + CALL _intrp<>(SB); BYTE $IdtMC /* #MC Machine-Check */ + CALL _intrp<>(SB); BYTE $IdtXF /* #XF SIMD Floating-Point */ + CALL _intrp<>(SB); BYTE $0x14 /* reserved */ + CALL _intrp<>(SB); BYTE $0x15 /* reserved */ + CALL _intrp<>(SB); BYTE $0x16 /* reserved */ + CALL _intrp<>(SB); BYTE $0x17 /* reserved */ + CALL _intrp<>(SB); BYTE $0x18 /* reserved */ + CALL _intrp<>(SB); BYTE $0x19 /* reserved */ + CALL _intrp<>(SB); BYTE $0x1a /* reserved */ + CALL _intrp<>(SB); BYTE $0x1b /* reserved */ + CALL _intrp<>(SB); BYTE $0x1c /* reserved */ + CALL _intrp<>(SB); BYTE $0x1d /* reserved */ + CALL _intrp<>(SB); BYTE $0x1e /* reserved */ + CALL _intrp<>(SB); BYTE $0x1f /* reserved */ + CALL _intrp<>(SB); BYTE $0x20 + CALL _intrp<>(SB); BYTE $0x21 + CALL _intrp<>(SB); BYTE $0x22 + CALL _intrp<>(SB); BYTE $0x23 + CALL _intrp<>(SB); BYTE $0x24 + CALL _intrp<>(SB); BYTE $0x25 + CALL _intrp<>(SB); BYTE $0x26 + CALL _intrp<>(SB); BYTE $0x27 + CALL _intrp<>(SB); BYTE $0x28 + CALL _intrp<>(SB); BYTE $0x29 + CALL _intrp<>(SB); BYTE $0x2a + CALL _intrp<>(SB); BYTE $0x2b + CALL _intrp<>(SB); BYTE $0x2c + CALL _intrp<>(SB); BYTE $0x2d + CALL _intrp<>(SB); BYTE $0x2e + CALL _intrp<>(SB); BYTE $0x2f + CALL _intrp<>(SB); BYTE $0x30 + CALL _intrp<>(SB); BYTE $0x31 + CALL _intrp<>(SB); BYTE $0x32 + CALL _intrp<>(SB); BYTE $0x33 + CALL _intrp<>(SB); BYTE $0x34 + CALL _intrp<>(SB); BYTE $0x35 + CALL _intrp<>(SB); BYTE $0x36 + CALL _intrp<>(SB); BYTE $0x37 + CALL _intrp<>(SB); BYTE $0x38 + CALL _intrp<>(SB); BYTE $0x39 + CALL _intrp<>(SB); BYTE $0x3a + CALL _intrp<>(SB); BYTE $0x3b + CALL _intrp<>(SB); BYTE $0x3c + CALL _intrp<>(SB); BYTE $0x3d + CALL _intrp<>(SB); BYTE $0x3e + CALL _intrp<>(SB); BYTE $0x3f + CALL _intrp<>(SB); BYTE $0x40 + CALL _intrp<>(SB); BYTE $0x41 + CALL _intrp<>(SB); BYTE $0x42 + CALL _intrp<>(SB); BYTE $0x43 + CALL _intrp<>(SB); BYTE $0x44 + CALL _intrp<>(SB); BYTE $0x45 + CALL _intrp<>(SB); BYTE $0x46 + CALL _intrp<>(SB); BYTE $0x47 + CALL _intrp<>(SB); BYTE $0x48 + CALL _intrp<>(SB); BYTE $0x49 + CALL _intrp<>(SB); BYTE $0x4a + CALL _intrp<>(SB); BYTE $0x4b + CALL _intrp<>(SB); BYTE $0x4c + CALL _intrp<>(SB); BYTE $0x4d + CALL _intrp<>(SB); BYTE $0x4e + CALL _intrp<>(SB); BYTE $0x4f + CALL _intrp<>(SB); BYTE $0x50 + CALL _intrp<>(SB); BYTE $0x51 + CALL _intrp<>(SB); BYTE $0x52 + CALL _intrp<>(SB); BYTE $0x53 + CALL _intrp<>(SB); BYTE $0x54 + CALL _intrp<>(SB); BYTE $0x55 + CALL _intrp<>(SB); BYTE $0x56 + CALL _intrp<>(SB); BYTE $0x57 + CALL _intrp<>(SB); BYTE $0x58 + CALL _intrp<>(SB); BYTE $0x59 + CALL _intrp<>(SB); BYTE $0x5a + CALL _intrp<>(SB); BYTE $0x5b + CALL _intrp<>(SB); BYTE $0x5c + CALL _intrp<>(SB); BYTE $0x5d + CALL _intrp<>(SB); BYTE $0x5e + CALL _intrp<>(SB); BYTE $0x5f + CALL _intrp<>(SB); BYTE $0x60 + CALL _intrp<>(SB); BYTE $0x61 + CALL _intrp<>(SB); BYTE $0x62 + CALL _intrp<>(SB); BYTE $0x63 + CALL _intrp<>(SB); BYTE $0x64 + CALL _intrp<>(SB); BYTE $0x65 + CALL _intrp<>(SB); BYTE $0x66 + CALL _intrp<>(SB); BYTE $0x67 + CALL _intrp<>(SB); BYTE $0x68 + CALL _intrp<>(SB); BYTE $0x69 + CALL _intrp<>(SB); BYTE $0x6a + CALL _intrp<>(SB); BYTE $0x6b + CALL _intrp<>(SB); BYTE $0x6c + CALL _intrp<>(SB); BYTE $0x6d + CALL _intrp<>(SB); BYTE $0x6e + CALL _intrp<>(SB); BYTE $0x6f + CALL _intrp<>(SB); BYTE $0x70 + CALL _intrp<>(SB); BYTE $0x71 + CALL _intrp<>(SB); BYTE $0x72 + CALL _intrp<>(SB); BYTE $0x73 + CALL _intrp<>(SB); BYTE $0x74 + CALL _intrp<>(SB); BYTE $0x75 + CALL _intrp<>(SB); BYTE $0x76 + CALL _intrp<>(SB); BYTE $0x77 + CALL _intrp<>(SB); BYTE $0x78 + CALL _intrp<>(SB); BYTE $0x79 + CALL _intrp<>(SB); BYTE $0x7a + CALL _intrp<>(SB); BYTE $0x7b + CALL _intrp<>(SB); BYTE $0x7c + CALL _intrp<>(SB); BYTE $0x7d + CALL _intrp<>(SB); BYTE $0x7e + CALL _intrp<>(SB); BYTE $0x7f + CALL _intrp<>(SB); BYTE $0x80 + CALL _intrp<>(SB); BYTE $0x81 + CALL _intrp<>(SB); BYTE $0x82 + CALL _intrp<>(SB); BYTE $0x83 + CALL _intrp<>(SB); BYTE $0x84 + CALL _intrp<>(SB); BYTE $0x85 + CALL _intrp<>(SB); BYTE $0x86 + CALL _intrp<>(SB); BYTE $0x87 + CALL _intrp<>(SB); BYTE $0x88 + CALL _intrp<>(SB); BYTE $0x89 + CALL _intrp<>(SB); BYTE $0x8a + CALL _intrp<>(SB); BYTE $0x8b + CALL _intrp<>(SB); BYTE $0x8c + CALL _intrp<>(SB); BYTE $0x8d + CALL _intrp<>(SB); BYTE $0x8e + CALL _intrp<>(SB); BYTE $0x8f + CALL _intrp<>(SB); BYTE $0x90 + CALL _intrp<>(SB); BYTE $0x91 + CALL _intrp<>(SB); BYTE $0x92 + CALL _intrp<>(SB); BYTE $0x93 + CALL _intrp<>(SB); BYTE $0x94 + CALL _intrp<>(SB); BYTE $0x95 + CALL _intrp<>(SB); BYTE $0x96 + CALL _intrp<>(SB); BYTE $0x97 + CALL _intrp<>(SB); BYTE $0x98 + CALL _intrp<>(SB); BYTE $0x99 + CALL _intrp<>(SB); BYTE $0x9a + CALL _intrp<>(SB); BYTE $0x9b + CALL _intrp<>(SB); BYTE $0x9c + CALL _intrp<>(SB); BYTE $0x9d + CALL _intrp<>(SB); BYTE $0x9e + CALL _intrp<>(SB); BYTE $0x9f + CALL _intrp<>(SB); BYTE $0xa0 + CALL _intrp<>(SB); BYTE $0xa1 + CALL _intrp<>(SB); BYTE $0xa2 + CALL _intrp<>(SB); BYTE $0xa3 + CALL _intrp<>(SB); BYTE $0xa4 + CALL _intrp<>(SB); BYTE $0xa5 + CALL _intrp<>(SB); BYTE $0xa6 + CALL _intrp<>(SB); BYTE $0xa7 + CALL _intrp<>(SB); BYTE $0xa8 + CALL _intrp<>(SB); BYTE $0xa9 + CALL _intrp<>(SB); BYTE $0xaa + CALL _intrp<>(SB); BYTE $0xab + CALL _intrp<>(SB); BYTE $0xac + CALL _intrp<>(SB); BYTE $0xad + CALL _intrp<>(SB); BYTE $0xae + CALL _intrp<>(SB); BYTE $0xaf + CALL _intrp<>(SB); BYTE $0xb0 + CALL _intrp<>(SB); BYTE $0xb1 + CALL _intrp<>(SB); BYTE $0xb2 + CALL _intrp<>(SB); BYTE $0xb3 + CALL _intrp<>(SB); BYTE $0xb4 + CALL _intrp<>(SB); BYTE $0xb5 + CALL _intrp<>(SB); BYTE $0xb6 + CALL _intrp<>(SB); BYTE $0xb7 + CALL _intrp<>(SB); BYTE $0xb8 + CALL _intrp<>(SB); BYTE $0xb9 + CALL _intrp<>(SB); BYTE $0xba + CALL _intrp<>(SB); BYTE $0xbb + CALL _intrp<>(SB); BYTE $0xbc + CALL _intrp<>(SB); BYTE $0xbd + CALL _intrp<>(SB); BYTE $0xbe + CALL _intrp<>(SB); BYTE $0xbf + CALL _intrp<>(SB); BYTE $0xc0 + CALL _intrp<>(SB); BYTE $0xc1 + CALL _intrp<>(SB); BYTE $0xc2 + CALL _intrp<>(SB); BYTE $0xc3 + CALL _intrp<>(SB); BYTE $0xc4 + CALL _intrp<>(SB); BYTE $0xc5 + CALL _intrp<>(SB); BYTE $0xc6 + CALL _intrp<>(SB); BYTE $0xc7 + CALL _intrp<>(SB); BYTE $0xc8 + CALL _intrp<>(SB); BYTE $0xc9 + CALL _intrp<>(SB); BYTE $0xca + CALL _intrp<>(SB); BYTE $0xcb + CALL _intrp<>(SB); BYTE $0xcc + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xcf + CALL _intrp<>(SB); BYTE $0xd0 + CALL _intrp<>(SB); BYTE $0xd1 + CALL _intrp<>(SB); BYTE $0xd2 + CALL _intrp<>(SB); BYTE $0xd3 + CALL _intrp<>(SB); BYTE $0xd4 + CALL _intrp<>(SB); BYTE $0xd5 + CALL _intrp<>(SB); BYTE $0xd6 + CALL _intrp<>(SB); BYTE $0xd7 + CALL _intrp<>(SB); BYTE $0xd8 + CALL _intrp<>(SB); BYTE $0xd9 + CALL _intrp<>(SB); BYTE $0xda + CALL _intrp<>(SB); BYTE $0xdb + CALL _intrp<>(SB); BYTE $0xdc + CALL _intrp<>(SB); BYTE $0xdd + CALL _intrp<>(SB); BYTE $0xde + CALL _intrp<>(SB); BYTE $0xdf + CALL _intrp<>(SB); BYTE $0xe0 + CALL _intrp<>(SB); BYTE $0xe1 + CALL _intrp<>(SB); BYTE $0xe2 + CALL _intrp<>(SB); BYTE $0xe3 + CALL _intrp<>(SB); BYTE $0xe4 + CALL _intrp<>(SB); BYTE $0xe5 + CALL _intrp<>(SB); BYTE $0xe6 + CALL _intrp<>(SB); BYTE $0xe7 + CALL _intrp<>(SB); BYTE $0xe8 + CALL _intrp<>(SB); BYTE $0xe9 + CALL _intrp<>(SB); BYTE $0xea + CALL _intrp<>(SB); BYTE $0xeb + CALL _intrp<>(SB); BYTE $0xec + CALL _intrp<>(SB); BYTE $0xed + CALL _intrp<>(SB); BYTE $0xee + CALL _intrp<>(SB); BYTE $0xef + CALL _intrp<>(SB); BYTE $0xf0 + CALL _intrp<>(SB); BYTE $0xf1 + CALL _intrp<>(SB); BYTE $0xf2 + CALL _intrp<>(SB); BYTE $0xf3 + CALL _intrp<>(SB); BYTE $0xf4 + CALL _intrp<>(SB); BYTE $0xf5 + CALL _intrp<>(SB); BYTE $0xf6 + CALL _intrp<>(SB); BYTE $0xf7 + CALL _intrp<>(SB); BYTE $0xf8 + CALL _intrp<>(SB); BYTE $0xf9 + CALL _intrp<>(SB); BYTE $0xfa + CALL _intrp<>(SB); BYTE $0xfb + CALL _intrp<>(SB); BYTE $0xfc + CALL _intrp<>(SB); BYTE $0xfd + CALL _intrp<>(SB); BYTE $0xfe + CALL _intrp<>(SB); BYTE $0xff --- /sys/src/fs/amd64/mem.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/mem.h Tue Aug 27 16:10:47 2013 @@ -0,0 +1,77 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#define GiB 1073741824u /* Gibi 000000000040000000 */ +#define TiB 1099511627776ull /* Tebi 0x0000010000000000 */ +#define PiB 1125899906842624ull /* Pebi 0x0004000000000000 */ +#define EiB 1152921504606846976ull /* Exbi 0x1000000000000000 */ + +#define ALIGNED(p, a) (!(((uintptr)(p)) & ((a)-1))) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BY2WD 4 +#define BY2V 8 /* bytes per double word */ +#define BY2SE 8 /* bytes per stack element */ +#define BLOCKALIGN 8 + +/* + * 4K pages + */ +#define PGSZ (4*KiB) /* page size */ +#define PGSHIFT 12 /* log(PGSZ) */ +#define PTSZ (4*KiB) /* page table page size */ +#define PTSHIFT 9 /* */ + +#define MACHSZ (4*KiB) /* Mach+stack size */ +#define MACHMAX 32 /* max. number of cpus */ +#define MACHSTKSZ (6*(4*KiB)) /* Mach stack size */ + +#define KSTACK (16*1024) /* Size of Proc kernel stack */ +#define STACKALIGN(sp) ((sp) & ~(BY2SE-1)) /* bug: assure with alloc */ + +/* + * 2M pages + */ +#define BIGPGSHIFT 21 +#define BIGPGSZ (1ull<>(((l)*PTSHIFT)+PGSHIFT)) & ((1<>8) & 0xFF); + cgaregw(0x0F, pos/2 & 0xFF); + BASE[pos+1] = Attr; +} + +static void +cgascreenputc(int c) +{ + int i; + + if(c == '\n'){ + pos = pos/Width; + pos = (pos+1)*Width; + } + else if(c == '\t'){ + i = 4 - ((pos/2)&3); + while(i-->0) + cgascreenputc(' '); + } + else if(c == '\b'){ + if(pos >= 2) + pos -= 2; + cgascreenputc(' '); + pos -= 2; + } + else{ + BASE[pos++] = c; + BASE[pos++] = Attr; + } + if(pos >= Width*Height){ + memmove(BASE, &BASE[Width], Width*(Height-1)); + memset(&BASE[Width*(Height-1)], 0, Width); + pos = Width*(Height-1); + } + movecursor(); +} + +static void +screeninit(void) +{ + lock(&screenlock); + if(screeninitdone == 0){ + pos = cgaregr(0x0E)<<8; + pos |= cgaregr(0x0F); + pos *= 2; + screeninitdone = 1; + } + unlock(&screenlock); +} + +void +cgaputs(char* s, int n) +{ + if(screeninitdone == 0) + screeninit(); + while(n-- > 0) + cgascreenputc(*s++); +} + +void +cgaputc(int c) +{ + if(screeninitdone == 0) + screeninit(); + cgascreenputc(c); +} --- /sys/src/fs/amd64/l64v.s Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/l64v.s Tue Aug 27 16:10:47 2013 @@ -0,0 +1,456 @@ +#include "amd64l.h" + +MODE $64 + +/* + * Port I/O. + */ +TEXT inb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INB + RET + +TEXT insb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSB + RET + +TEXT ins(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INW + RET + +TEXT inss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSW + RET + +TEXT inl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + INL + RET + +TEXT insl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSL + RET + +TEXT outb(SB), 1, $-1 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL byte+8(FP), AX + OUTB + RET + +TEXT outsb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSB + RET + +TEXT outs(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL short+8(FP), AX + OUTW + RET + +TEXT outss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSW + RET + +TEXT outl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL long+8(FP), AX + OUTL + RET + +TEXT outsl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSL + RET + +/* + * Load/store segment descriptor tables: + * GDT - global descriptor table + * IDT - interrupt descriptor table + * TR - task register + * GDTR and LDTR take an m16:m64 argument, + * so shuffle the stack arguments to + * get it in the right format. + */ +TEXT gdtget(SB), 1, $-4 + MOVL GDTR, (RARG) /* Note: 10 bytes returned */ + RET + +TEXT gdtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + + MOVL (RARG), GDTR + + XORQ AX, AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + POPQ AX + MOVWQZX cs+16(FP), BX + PUSHQ BX + PUSHQ AX + RETFQ + +TEXT idtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + MOVL (RARG), IDTR + RET + +TEXT trput(SB), 1, $-4 + MOVW RARG, TASK + RET + +/* + * Read/write various system registers. + */ +TEXT getcr0(SB), 1, $-4 /* Processor Control */ + MOVQ CR0, AX + RET + +TEXT putcr0(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR0 + RET + +TEXT getcr2(SB), 1, $-4 /* #PF Linear Address */ + MOVQ CR2, AX + RET + +TEXT getcr3(SB), 1, $-4 /* PML4 Base */ + MOVQ CR3, AX + RET + +TEXT putcr3(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR3 + RET + +TEXT getcr4(SB), 1, $-4 /* Extensions */ + MOVQ CR4, AX + RET + +TEXT putcr4(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR4 + RET + +TEXT rdtsc(SB), 1, $-4 /* Time Stamp Counter */ + RDTSC + /* u64int rdtsc(void); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT rdmsr(SB), 1, $-4 /* Model-Specific Register */ + MOVL RARG, CX + + RDMSR + /* u64int rdmsr(u32int); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT wrmsr(SB), 1, $-4 + MOVL RARG, CX + MOVL lo+8(FP), AX + MOVL hi+12(FP), DX + + WRMSR + + RET + +TEXT invlpg(SB), 1, $-4 /* INVLPG va+0(FP) */ + MOVQ RARG, va+0(FP) + + INVLPG va+0(FP) + + RET + +TEXT wbinvd(SB), 1, $-4 + WBINVD + RET + +/* + * Serialisation. + */ +TEXT lfence(SB), 1, $-4 + LFENCE + RET + +TEXT mfence(SB), 1, $-4 + MFENCE + RET + +TEXT sfence(SB), 1, $-4 + SFENCE + RET + +/* + * Note: CLI and STI are not serialising instructions. + * Is that assumed anywhere? + */ +TEXT splhi(SB), 1, $-4 +_splhi: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JZ _alreadyhi /* use CMOVLEQ etc. here? */ + + MOVQ (SP), BX + MOVQ BX, 8(RMACH) /* save PC in m->splpc */ + +_alreadyhi: + CLI + RET + +TEXT spllo(SB), 1, $-4 +_spllo: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JNZ _alreadylo /* use CMOVLEQ etc. here? */ + + MOVQ $0, 8(RMACH) /* clear m->splpc */ + +_alreadylo: + STI + RET + +TEXT splx(SB), 1, $-4 + TESTQ $If, RARG /* If - Interrupt Flag */ + JNZ _spllo + JMP _splhi + +TEXT islo(SB), 1, $-4 + PUSHFQ + POPQ AX + ANDQ $If, AX /* If - Interrupt Flag */ + RET + +/* + * Synchronisation + */ +TEXT ainc(SB), 1, $-4 /* int ainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX /* overflow if -ve or 0 */ + JGT _return +_trap: + XORQ BX, BX + MOVQ (BX), BX /* over under sideways down */ +_return: + RET + +TEXT adec(SB), 1, $-4 /* int adec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX /* underflow if -ve */ + JLT _trap + + RET + +/* + * Semaphores rely on negative values for the counter, + * and don't have the same overflow/underflow conditions + * as ainc/adec. + */ +TEXT semainc(SB), 1, $-4 /* int semainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX + RET + +TEXT semadec(SB), 1, $-4 /* int semadec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX + RET + +TEXT tas32(SB), 1, $-4 + MOVL $0xdeaddead, AX + XCHGL AX, (RARG) /* */ + RET + +TEXT fas64(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + +TEXT cas32(SB), 1, $-4 + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +TEXT cas64(SB), 1, $-4 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * Label consists of a stack pointer and a programme counter + */ +TEXT gotolabel(SB), 1, $-4 + MOVQ 0(RARG), SP /* restore SP */ + MOVQ 8(RARG), AX /* put return PC on the stack */ + MOVQ AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVQ SP, 0(RARG) /* store SP */ + MOVQ 0(SP), BX /* store return PC */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT hardhalt(SB), 1, $-4 + STI + HLT + RET + +TEXT _monitor(SB), 1, $-4 /* void monitor(void*); */ + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + RET + +TEXT _mwait(SB), 1, $-4 /* void mwait(u32int); */ + MOVLQZX RARG, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + RET + +TEXT k10mwait+0(SB),0,$16 +k10mwloop: + MOVQ RARG, CX + MOVQ val+8(FP), DX + MOVQ (CX), AX + CMPQ AX, DX + JNE k10mwdone + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + MOVQ RARG, CX + MOVQ (CX),AX + MOVQ val+8(FP), DX + CMPQ AX, DX + JNE k10mwdone + XORQ DX, DX + XORQ CX, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + JMP k10mwloop +k10mwdone: + RET , + +TEXT mul64fract(SB), 1, $-4 + MOVQ a+8(FP), AX + MULQ b+16(FP) /* a*b */ + SHRQ $32, AX:DX + MOVQ AX, (RARG) + RET + +#define RDRANDAX BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 +#define RDRAND64AX BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0xf0 + +TEXT rdrand32(SB), $-4 +loop32: + RDRANDAX + JCC loop32 + RET + +TEXT rdrand64(SB), $-4 +loop64: + RDRAND64AX + JCC loop64 + RET + +TEXT rdrandbuf(SB), $0 + MOVQ RARG, DX + + MOVLQZX cnt+8(FP), CX + SHRQ $3, CX +eights: + CMPL CX, $0 + JLE f1 + CALL rdrand64(SB) + MOVQ AX, 0(DX) + ADDQ $8, DX + SUBL $1, CX + JMP eights + +f1: + MOVLQZX cnt+8(FP), CX + ANDL $7, CX + SHRQ $2, CX +fours: + CMPL CX, $0 + JLE f2 + CALL rdrand32(SB) + MOVL AX, 0(DX) + ADDQ $4, DX + SUBL $1, CX + JMP fours + +f2: + MOVLQZX cnt+8(FP), CX + ANDL $3, CX +ones: + CMPL CX, $0 + JLE f3 + CALL rdrand32(SB) + MOVB AX, 0(DX) + ADDQ $1, DX + SUBL $1, CX + JMP ones + +f3: + RET --- /sys/src/fs/amd64/arch.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/arch.c Tue Aug 27 16:10:48 2013 @@ -0,0 +1,723 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define DBG(...) // print(__VA_ARGS__) + +enum { + Intel, + Amd, +}; + +enum { /* cpuid standard function codes */ + Highstdfunc = 0, + Procsig, +}; + +typedef struct Arch Arch; + +struct Arch { + int vendor; + char vstring[12+1]; +}; + +Mconf mconf; + +static Arch aarch; + +void +delay(int ms) +{ + u64int r, t; + + if(ms <= 0) + ms = 1; + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*ms; r < t; r = rdtsc()) + ; +} + +void +microdelay(int µs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*µs; r < t; r = rdtsc()) + ; +} + +static char* +append(char *s, char *e, uint r) +{ + if(s+4 <= e){ + memmove(s, &r, 4); + s += 4; + } + return s; +} + +static char* +brandstring(char *p, char *e) +{ + int i; + Cpuidreg r; + + for(i = 0; i < 3; i++){ + memset(&r, 0, sizeof r); + r.ax = 0x80000002+i; + cpuid(&r); + p = append(p, e, r.ax); + p = append(p, e, r.bx); + p = append(p, e, r.cx); + p = append(p, e, r.dx); + } + if(p == e) + p--; + *p = 0; + return p; +} + +/* use intel brand string to discover hz */ +static vlong +intelbshz(void) +{ + char s[4*4*3+1], *h; + uvlong scale; + + brandstring(s, s+sizeof s); + DBG("brandstring: %s\n", s); + + h = strstr(s, "Hz"); /* 3.07THz */ + if(h == nil || h-s < 5) + return 0; + h[2] = 0; + + scale = 1000; + switch(h[-1]){ + default: + return 0; + case 'T': + scale *= 1000; + case 'G': + scale *= 1000; + case 'M': + scale *= 1000; + } + + /* get rid of the fractional part */ + if(h[-4] == '.'){ + h[-4] = h[-5]; + h[-5] = ' '; + scale /= 100; + } + return strtoul(h-5, 0, 0)*scale; +} + +static vlong +cpuidhz(Cpuidreg *regs) +{ + int r; + vlong hz; + u64int msr; + + if(aarch.vendor == Intel){ + switch(regs->ax & 0x0fff3ff0){ + default: + hz = intelbshz(); + break; + } + DBG("cpuidhz: %#llud hz\n", hz); + } + else if(aarch.vendor == Amd){ + switch(regs->ax & 0x0fff0ff0){ + default: + return 0; + case 0x00000f50: /* K8 */ + msr = rdmsr(0xc0010042); + if(msr == 0) + return 0; + hz = (800 + 200*((msr>>1) & 0x1f)) * 1000000ll; + break; + case 0x00100f90: /* K10 */ + case 0x00000620: /* QEMU64 */ + msr = rdmsr(0xc0010064); + r = (msr>>6) & 0x07; + hz = (((msr & 0x3f)+0x10)*100000000ll)/(1<cpuhz = cpuidhz(&r); + m->cpumhz = m->cpuhz / 1000000ull; + if(m->cpuhz == 0) + panic("can't determine clock"); +} + +void +clockinit(void) +{ + char *p, *e; + Cpuidreg r; + + archinit(); + + /* hook to add other stuff */ + memset(&r, 0, sizeof r); + r.ax = Highstdfunc; + cpuid(&r); + e = aarch.vstring + sizeof aarch.vstring; + p = append(aarch.vstring, e, r.bx); + p = append(p, e, r.dx); + p = append(p, e, r.cx); + *p = '\0'; + + if(strcmp(aarch.vstring, "AuthenticAMD") == 0) + aarch.vendor = Amd; + else if(strcmp(aarch.vstring, "GenuineIntel") == 0) + aarch.vendor = Intel; + else + panic("unknown cpu vendor %s", aarch.vstring); +} + +void +printcpufreq(void) +{ + char buf[128], *p, *e; + + p = buf; + e = buf + sizeof buf; + p = seprint(p, e, "cpu%d: %dMHz ", 0, m->cpumhz); + p = seprint(p, e, "%s ", aarch.vstring); + p = brandstring(p, e); + seprint(p, e, "\n"); + print(buf); + print("\n"); +} + +void +cpuidentify(void) +{ + wrmsr(0x10, 0); /* reset tsc */ + + vsvminit(MACHSTKSZ); + archinit(); + printcpufreq(); +} + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 1024 bytes available at CONFADDR. + */ +#define CONFADDR ((char*)KADDR(0x1200)) /* info passed from boot loader */ +#define BOOTLINE CONFADDR +#define BOOTLINELEN 64 +#define BOOTARGS ((CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (1024-BOOTLINELEN) +#define MAXCONF 32 + +char bootdisk[NAMELEN]; +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; + +int +getcfields(char* lp, char** fields, int n, char* sep) +{ + int i; + + for(i = 0; lp && *lp && i < n; i++){ + while(*lp && strchr(sep, *lp) != 0) + *lp++ = 0; + if(*lp == 0) + break; + fields[i] = lp; + while(*lp && strchr(sep, *lp) == 0){ + if(*lp == '\\' && *(lp+1) == '\n') + *lp++ = ' '; + lp++; + } + } + + return i; +} + +static void +options(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getcfields(cp, line, MAXCONF, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == 0) + continue; + *cp++ = 0; + if(cp - line[i] >= NAMELEN+1) + *(line[i]+NAMELEN-1) = 0; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +typedef struct { + u64int base; + u64int lim; + u32int type; +}Emap; + +static char *etypes[] = +{ + "type=0", + "memory", + "reserved", + "acpi reclaim", + "acpi nvs", + "unusable", + "disable", +}; + +#define maxe820 32 + +/* debugging crap */ +uint ne820; +static Emap emap[maxe820]; + +void +cmd_e820(int, char **) +{ + uint n; + Emap *e, *end; + vlong sz, ex; + + print("found %ud e820 entries %ud banks\n", ne820, mconf.nbank); + + e = emap; + end = e+ne820; + + n = 0; + sz = 0; + ex = 0; + for(; ebase, e->lim); + if(e->type < nelem(etypes)) + print("%s\n", etypes[e->type]); + else + print("type=%ud\n", e->type); + + if(e->type != 1 || e->base == 0) + continue; + sz += e->lim - e->base; + if(++n == MAXBANK) + continue; + + print("\t" "bank %llux %llux\n", e->base, e->lim); + } + print("found %d e820 memory banks %lludMB+%lludMB\n", n, sz/MiB, ex/MiB); +} + +static void +e820(void) +{ + char *s, *f[32*3]; + uint i, n, bank; + Emap *e; + + s = getconf("*e820"); + if(s == nil) + panic("*e820 scan fails"); + n = getfields(s, f, nelem(f), 0, " "); + n -= n%3; + + bank = 0; + for(i = 0; i < n; i += 3){ + e = emap + ne820; + e->type = strtoull(f[i+0], 0, 0); + e->base = strtoull(f[i+1], 0, 0); + e->lim = strtoull(f[i+2], 0, 0); + ne820++; + bank |= e->type == 1 && e->base != 0; + } + if(bank == 0) + panic("*e820 scan fails"); + cmd_install("e820", "-- print e820 scan results", cmd_e820); +} + +static void +addbank(Emap *e) +{ + uintmem base; + Mbank *b; + + if(mconf.nbank == nelem(mconf.bank)){ + print("addbank: lost chunk %#P:%#P; increase MAXBANK\n", e->base, e->lim); + return; + } + base = e->base; + if(mconf.nbank == 0){ + if(e->lim < INIMAP) + panic("need at least %d bytes memory", INIMAP); + b = mconf.bank + mconf.nbank; + b->base = base + PADDR(end); + b->base = ROUNDUP(b->base, BY2PG); + b->limit = INIMAP; + mconf.nbank++; + base = INIMAP; + } + b = mconf.bank + mconf.nbank; + b->base = base; + b->limit = e->lim; + mconf.nbank++; +} + +void +bankinit(void) +{ + uint i; + Emap *e; + + for(i = 0; i < ne820; i++){ + e = emap + i; + if(e->type != 1 || e->base == 0) + continue; + addbank(e); + } +} + +extern void cmd_vec(int, char**); +extern void cmd_machvec(int, char**); + +void +vecinit(void) +{ + options(); + e820(); + bankinit(); + mmuinit(); + pcireset(); + trapinit(); + acpiinit(MACHMAX); +#ifdef MPS + mpsinit(MACHMAX); /* remove this */ +#endif + lapiconline(); + ioapiconline(); + sipi(); + fpuinit(); + + cmd_install("vec", "-- vectors", cmd_vec); /* move to trap.c */ + cmd_install("machvec", "-- vectors cnt", cmd_machvec); /* move to trap.c */ +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +void +lockinit(void) +{ +} + +void +idle(void) +{ + hardhalt(); +} + +void +launchinit(void) +{ +} + +void +lights(int, int) +{ +} + +Float +famd(Float a, int b, int c, int d) +{ + return ((a+b) * c) / d; +} + +ulong +fdf(Float a, int b) +{ + return a / b; +} + +uintmem +meminit(void) +{ + uint i; + uintmem sz; + + sz = 0; + for(i = 0; i < mconf.nbank; i++) + sz += mconf.bank[i].limit-mconf.bank[i].base; + return sz; +} + +static void +addmachpgsz(int bits) +{ + int i; + + i = m->npgsz; + m->pgszlg2[i] = bits; + m->pgszmask[i] = (1<pgsz[i] = 1<npgsz++; +} + +int +archmmu(void) +{ + Cpuidreg r; + + addmachpgsz(12); + addmachpgsz(21); + + /* + * Check the Page1GB bit in function 0x80000001 DX for 1*GiB support. + */ + r = (Cpuidreg){0x80000001, 0, 0, 0}; + cpuid(&r); + if(r.cx & 0x04000000) + addmachpgsz(30); + + return m->npgsz; +} + +void +userinit(void (*f)(void), void *arg, char *text) +{ + User *p; + + p = newproc(); + + /* + * Kernel Stack. + * The - sizeof(uintptr) is because the path sched()->gotolabel()->init0()->f() + * uses a stack location without creating any local space. + */ + p->sched.pc = (uintptr)init0; + p->sched.sp = (uintptr)p->stack + sizeof(p->stack) - sizeof(uintptr); + p->start = f; + p->text = text; + p->arg = arg; + + dofilter(&p->time); + ready(p); +} + +int +consgetc(void) +{ + int c; + + if(c = kbdgetc()) + return c; + if(c = cecgetc()) + return c; + return uartgetc(); +} + +void +consputs(char* s, int n) +{ + cgaputs(s, n); + cecputs(s, n); + uartputs(s, n); +} + +/* remove this crunchy junk */ +void +wave(int c) +{ + outb(0x3F8+0, c); + while((inb(0x3F8+5) & 1<<5) == 0) + ; +} + +void +waveprint(char *fmt, ...) +{ + int i; + static Lock lk; + + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + vseprint(buf, buf+sizeof buf, fmt, arg); + va_end(arg); + + ilock(&lk); + for(i = 0; buf[i] != 0; i++){ + wave(buf[i]); + cgaputc(buf[i]); + microdelay(100); + } + iunlock(&lk); +} + +void +consinit(void) +{ + char *p; + int baud, port; + + kbdinit(); + + if((p = getconf("console")) == 0 || cistrcmp(p, "cga") == 0) + return; + + port = strtoul(p, &p, 0); + if(port < 0 || port > 1) + return; + while(*p == ' ' || *p == '\t') + p++; + if(*p != 'b' || (baud = strtoul(p+1, 0, 0)) == 0) + baud = 9600; + + uartspecial(port, kbdchar, nil, baud); + conf.useuart = 1; +} + +void +consreset(void) +{ +} + +int +pciconfig(char *class, int ctlrno, Pciconf *pci) +{ + char cc[NAMELEN], *p, *q, *r; + int n; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + for(n = 0; n < nconf; n++){ + if(cistrncmp(confname[n], cc, NAMELEN)) + continue; + pci->nopt = 0; + p = confval[n]; + while(*p){ + while(*p == ' ' || *p == '\t') + p++; + if(*p == '\0') + break; + if(cistrncmp(p, "type=", 5) == 0){ + p += 5; + for(q = pci->type; q < &pci->type[NAMELEN-1]; q++){ + if(*p == '\0' || *p == ' ' || *p == '\t') + break; + *q = *p++; + } + *q = '\0'; + } + else if(cistrncmp(p, "port=", 5) == 0) + pci->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + pci->irq = strtoul(p+4, &p, 0); + else if(pci->nopt < Npciopt){ + r = pci->opt[pci->nopt]; + while(*p && *p != ' ' && *p != '\t'){ + *r++ = *p++; + if(r-pci->opt[pci->nopt] >= Pcioptlen-1) + break; + } + *r = '\0'; + pci->nopt++; + } + while(*p && *p != ' ' && *p != '\t') + p++; + } + return 1; + } + return 0; +} + +void +cycles(uvlong *t) +{ + *t = rdtsc(); +} + +void +firmware(void) +{ + char *p; + + /* + * Always called splhi(). + */ + if((p = getconf("reset")) && cistrcmp(p, "manual") == 0){ + predawn = 1; + print("\nHit Reset\n"); + for(;;); + } + pcireset(); + i8042reset(); +} + +void +clockreload(Timet) +{ +} + +void +archmach0init(Mach *m) +{ + m->stack = PTR2UINT(sys->machstk); + m->vsvm = sys->vsvmpage; + + m->online = 1; +} --- /sys/src/fs/amd64/amd64.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/amd64.h Tue Aug 27 16:10:48 2013 @@ -0,0 +1,209 @@ +enum { /* Cr0 */ + Pe = 0x00000001, /* Protected Mode Enable */ + Mp = 0x00000002, /* Monitor Coprocessor */ + Em = 0x00000004, /* Emulate Coprocessor */ + Ts = 0x00000008, /* Task Switched */ + Et = 0x00000010, /* Extension Type */ + Ne = 0x00000020, /* Numeric Error */ + Wp = 0x00010000, /* Write Protect */ + Am = 0x00040000, /* Alignment Mask */ + Nw = 0x20000000, /* Not Writethrough */ + Cd = 0x40000000, /* Cache Disable */ + Pg = 0x80000000, /* Paging Enable */ +}; + +enum { /* Cr3 */ + Pwt = 0x00000008, /* Page-Level Writethrough */ + Pcd = 0x00000010, /* Page-Level Cache Disable */ +}; + +enum { /* Cr4 */ + Vme = 0x00000001, /* Virtual-8086 Mode Extensions */ + Pvi = 0x00000002, /* Protected Mode Virtual Interrupts */ + Tsd = 0x00000004, /* Time-Stamp Disable */ + De = 0x00000008, /* Debugging Extensions */ + Pse = 0x00000010, /* Page-Size Extensions */ + Pae = 0x00000020, /* Physical Address Extension */ + Mce = 0x00000040, /* Machine Check Enable */ + Pge = 0x00000080, /* Page-Global Enable */ + Pce = 0x00000100, /* Performance Monitoring Counter Enable */ + Osfxsr = 0x00000200, /* FXSAVE/FXRSTOR Support */ + Osxmmexcpt = 0x00000400, /* Unmasked Exception Support */ +}; + +enum { /* cpuid fn 1 dx */ + Pat = 1<<16, /* page table attributes (memory type control */ +}; + +enum { /* Rflags */ + Cf = 0x00000001, /* Carry Flag */ + Pf = 0x00000004, /* Parity Flag */ + Af = 0x00000010, /* Auxiliary Flag */ + Zf = 0x00000040, /* Zero Flag */ + Sf = 0x00000080, /* Sign Flag */ + Tf = 0x00000100, /* Trap Flag */ + If = 0x00000200, /* Interrupt Flag */ + Df = 0x00000400, /* Direction Flag */ + Of = 0x00000800, /* Overflow Flag */ + Iopl0 = 0x00000000, /* I/O Privilege Level */ + Iopl1 = 0x00001000, + Iopl2 = 0x00002000, + Iopl3 = 0x00003000, + Nt = 0x00004000, /* Nested Task */ + Rf = 0x00010000, /* Resume Flag */ + Vm = 0x00020000, /* Virtual-8086 Mode */ + Ac = 0x00040000, /* Alignment Check */ + Vif = 0x00080000, /* Virtual Interrupt Flag */ + Vip = 0x00100000, /* Virtual Interrupt Pending */ + Id = 0x00200000, /* ID Flag */ +}; + +enum { /* MSRs */ + PerfEvtbase = 0xc0010000, /* Performance Event Select */ + PerfCtrbase = 0xc0010004, /* Performance Counters */ + + Efer = 0xc0000080, /* Extended Feature Enable */ + Star = 0xc0000081, /* Legacy Target IP and [CS]S */ + Lstar = 0xc0000082, /* Long Mode Target IP */ + Cstar = 0xc0000083, /* Compatibility Target IP */ + Sfmask = 0xc0000084, /* SYSCALL Flags Mask */ + FSbase = 0xc0000100, /* 64-bit FS Base Address */ + GSbase = 0xc0000101, /* 64-bit GS Base Address */ + KernelGSbase = 0xc0000102, /* SWAPGS instruction */ +}; + +enum { /* Efer */ + Sce = 0x00000001, /* System Call Extension */ + Lme = 0x00000100, /* Long Mode Enable */ + Lma = 0x00000400, /* Long Mode Active */ + Nxe = 0x00000800, /* No-Execute Enable */ + Svme = 0x00001000, /* SVM Extension Enable */ + Ffxsr = 0x00004000, /* Fast FXSAVE/FXRSTOR */ +}; + +enum { /* PML4E/PDPE/PDE/PTE */ + PteP = 0x0000000000000001ull,/* Present */ + PteRW = 0x0000000000000002ull,/* Read/Write */ + PteU = 0x0000000000000004ull,/* User/Supervisor */ + PtePWT = 0x0000000000000008ull,/* Page-Level Write Through */ + PtePCD = 0x0000000000000010ull,/* Page Level Cache Disable */ + PteA = 0x0000000000000020ull,/* Accessed */ + PteD = 0x0000000000000040ull,/* Dirty */ + PtePS = 0x0000000000000080ull,/* Page Size */ + Pte4KPAT = PtePS, /* PTE PAT */ + PteG = 0x0000000000000100ull,/* Global */ + Pte2MPAT = 0x0000000000001000ull,/* PDE PAT */ + Pte1GPAT = Pte2MPAT, /* PDPE PAT */ + PteNX = 0x8000000000000000ull,/* No Execute */ +}; + +enum { + PATUC = 0, /* uncachable */ + PATWC = 1, /* use write-combining buffers */ + PATWT = 4, /* write-through */ + PATWP = 5, /* write protect */ + PATWB = 6, /* write back */ + PATUCMINUS = 7, /* UC-; strongly uncacheable */ +}; + +enum { /* Exceptions */ + IdtDE = 0, /* Divide-by-Zero Error */ + IdtDB = 1, /* Debug */ + IdtNMI = 2, /* Non-Maskable-Interrupt */ + IdtBP = 3, /* Breakpoint */ + IdtOF = 4, /* Overflow */ + IdtBR = 5, /* Bound-Range */ + IdtUD = 6, /* Invalid-Opcode */ + IdtNM = 7, /* Device-Not-Available */ + IdtDF = 8, /* Double-Fault */ + Idt09 = 9, /* unsupported */ + IdtTS = 10, /* Invalid-TSS */ + IdtNP = 11, /* Segment-Not-Present */ + IdtSS = 12, /* Stack */ + IdtGP = 13, /* General-Protection */ + IdtPF = 14, /* Page-Fault */ + Idt0F = 15, /* reserved */ + IdtMF = 16, /* x87 FPE-Pending */ + IdtAC = 17, /* Alignment-Check */ + IdtMC = 18, /* Machine-Check */ + IdtXF = 19, /* SIMD Floating-Point */ +}; + +/* + * Vestigial Segmented Virtual Memory. + */ +enum { /* Segment Descriptor */ + SdISTM = 0x0000000700000000ull,/* Interrupt Stack Table Mask */ + SdA = 0x0000010000000000ull,/* Accessed */ + SdR = 0x0000020000000000ull,/* Readable (Code) */ + SdW = 0x0000020000000000ull,/* Writeable (Data) */ + SdE = 0x0000040000000000ull,/* Expand Down */ + SdaTSS = 0x0000090000000000ull,/* Available TSS */ + SdbTSS = 0x00000b0000000000ull,/* Busy TSS */ + SdCG = 0x00000c0000000000ull,/* Call Gate */ + SdIG = 0x00000e0000000000ull,/* Interrupt Gate */ + SdTG = 0x00000f0000000000ull,/* Trap Gate */ + SdCODE = 0x0000080000000000ull,/* Code/Data */ + SdS = 0x0000100000000000ull,/* System/User */ + SdDPL0 = 0x0000000000000000ull,/* Descriptor Privilege Level */ + SdDPL1 = 0x0000200000000000ull, + SdDPL2 = 0x0000400000000000ull, + SdDPL3 = 0x0000600000000000ull, + SdP = 0x0000800000000000ull,/* Present */ + Sd4G = 0x000f00000000ffffull,/* 4G Limit */ + SdL = 0x0020000000000000ull,/* Long Attribute */ + SdD = 0x0040000000000000ull,/* Default Operand Size */ + SdG = 0x0080000000000000ull,/* Granularity */ +}; + +/* + * Performance Counter Configuration + */ +enum { /* Performance Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +enum { /* Segment Selector */ + SsRPL0 = 0x0000, /* Requestor Privilege Level */ + SsRPL1 = 0x0001, + SsRPL2 = 0x0002, + SsRPL3 = 0x0003, + SsTIGDT = 0x0000, /* GDT Table Indicator */ + SsTILDT = 0x0004, /* LDT Table Indicator */ + SsSIM = 0xfff8, /* Selector Index Mask */ +}; + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +enum { + SiNULL = 0, /* NULL selector index */ + SiCS = 1, /* CS selector index */ + SiDS = 2, /* DS selector index */ + SiU32CS = 3, /* User CS selector index */ + SiUDS = 4, /* User DS selector index */ + SiUCS = 5, /* User CS selector index */ + SiFS = 6, /* FS selector index */ + SiGS = 7, /* GS selector index */ + SiTSS = 8, /* TSS selector index */ +}; + +/* + * Extern registers. + */ +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ --- /sys/src/fs/amd64/amd64l.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/amd64l.h Tue Aug 27 16:10:49 2013 @@ -0,0 +1,162 @@ +#define Pe 0x00000001 /* Protected Mode Enable */ +#define Mp 0x00000002 /* Monitor Coprocessor */ +#define Em 0x00000004 /* Emulate Coprocessor */ +#define Ts 0x00000008 /* Task Switched */ +#define Et 0x00000010 /* Extension Type */ +#define Ne 0x00000020 /* Numeric Error */ +#define Wp 0x00010000 /* Write Protect */ +#define Am 0x00040000 /* Alignment Mask */ +#define Nw 0x20000000 /* Not Writethrough */ +#define Cd 0x40000000 /* Cache Disable */ +#define Pg 0x80000000 /* Paging Enable */ + +#define Pwt 0x00000008 /* Page-Level Writethrough */ +#define Pcd 0x00000010 /* Page-Level Cache Disable */ + +#define Vme 0x00000001 /* Virtual-8086 Mode Extensions */ +#define Pvi 0x00000002 /* Protected Mode Virtual Interrupts */ +#define Tsd 0x00000004 /* Time-Stamp Disable */ +#define De 0x00000008 /* Debugging Extensions */ +#define Pse 0x00000010 /* Page-Size Extensions */ +#define Pae 0x00000020 /* Physical Address Extension */ +#define Mce 0x00000040 /* Machine Check Enable */ +#define Pge 0x00000080 /* Page-Global Enable */ +#define Pce 0x00000100 /* Performance Monitoring Counter Enable */ +#define Osfxsr 0x00000200 /* FXSAVE/FXRSTOR Support */ +#define Osxmmexcpt 0x00000400 /* Unmasked Exception Support */ + +#define Cf 0x00000001 /* Carry Flag */ +#define Pf 0x00000004 /* Parity Flag */ +#define Af 0x00000010 /* Auxiliary Flag */ +#define Zf 0x00000040 /* Zero Flag */ +#define Sf 0x00000080 /* Sign Flag */ +#define Tf 0x00000100 /* Trap Flag */ +#define If 0x00000200 /* Interrupt Flag */ +#define Df 0x00000400 /* Direction Flag */ +#define Of 0x00000800 /* Overflow Flag */ +#define Iopl0 0x00000000 /* I/O Privilege Level */ +#define Iopl1 0x00001000 +#define Iopl2 0x00002000 +#define Iopl3 0x00003000 +#define Nt 0x00004000 /* Nested Task */ +#define Rf 0x00010000 /* Resume Flag */ +#define Vm 0x00020000 /* Virtual-8086 Mode */ +#define Ac 0x00040000 /* Alignment Check */ +#define Vif 0x00080000 /* Virtual Interrupt Flag */ +#define Vip 0x00100000 /* Virtual Interrupt Pending */ +#define Id 0x00200000 /* ID Flag */ + +#define PerfEvtbase 0xc0010000 /* Performance Event Select */ +#define PerfCtrbase 0xc0010004 /* Performance Counters */ + +#define Efer 0xc0000080 /* Extended Feature Enable */ +#define Star 0xc0000081 /* Legacy Target IP and [CS]S */ +#define Lstar 0xc0000082 /* Long Mode Target IP */ +#define Cstar 0xc0000083 /* Compatibility Target IP */ +#define Sfmask 0xc0000084 /* SYSCALL Flags Mask */ +#define FSbase 0xc0000100 /* 64-bit FS Base Address */ +#define GSbase 0xc0000101 /* 64-bit GS Base Address */ +#define KernelGSbase 0xc0000102 /* SWAPGS instruction */ + +#define Sce 0x00000001 /* System Call Extension */ +#define Lme 0x00000100 /* Long Mode Enable */ +#define Lma 0x00000400 /* Long Mode Active */ +#define Nxe 0x00000800 /* No-Execute Enable */ +#define Svme 0x00001000 /* SVM Extension Enable */ +#define Ffxsr 0x00004000 /* Fast FXSAVE/FXRSTOR */ + +#define PteP 0x0000000000000001ull /* Present */ +#define PteRW 0x0000000000000002ull /* Read/Write */ +#define PteU 0x0000000000000004ull /* User/Supervisor */ +#define PtePWT 0x0000000000000008ull /* Page-Level Write Through */ +#define PtePCD 0x0000000000000010ull /* Page Level Cache Disable */ +#define PteA 0x0000000000000020ull /* Accessed */ +#define PteD 0x0000000000000040ull /* Dirty */ +#define PtePS 0x0000000000000080ull /* Page Size */ +#define Pte4KPAT PtePS /* PTE PAT */ +#define PteG 0x0000000000000100ull /* Global */ +#define Pte2MPAT 0x0000000000001000ull /* PDE PAT */ +#define Pte1GPAT Pte2MPAT /* PDPE PAT */ +#define PteNX 0x8000000000000000ull /* No Execute */ + +#define IdtDE 0 /* Divide-by-Zero Error */ +#define IdtDB 1 /* Debug */ +#define IdtNMI 2 /* Non-Maskable-Interrupt */ +#define IdtBP 3 /* Breakpoint */ +#define IdtOF 4 /* Overflow */ +#define IdtBR 5 /* Bound-Range */ +#define IdtUD 6 /* Invalid-Opcode */ +#define IdtNM 7 /* Device-Not-Available */ +#define IdtDF 8 /* Double-Fault */ +#define Idt09 9 /* unsupported */ +#define IdtTS 10 /* Invalid-TSS */ +#define IdtNP 11 /* Segment-Not-Present */ +#define IdtSS 12 /* Stack */ +#define IdtGP 13 /* General-Protection */ +#define IdtPF 14 /* Page-Fault */ +#define Idt0F 15 /* reserved */ +#define IdtMF 16 /* x87 FPE-Pending */ +#define IdtAC 17 /* Alignment-Check */ +#define IdtMC 18 /* Machine-Check */ +#define IdtXF 19 /* SIMD Floating-Point */ + +#define SdISTM 0x0000000700000000ull /* Interrupt Stack Table Mask */ +#define SdA 0x0000010000000000ull /* Accessed */ +#define SdR 0x0000020000000000ull /* Readable (Code) */ +#define SdW 0x0000020000000000ull /* Writeable (Data) */ +#define SdE 0x0000040000000000ull /* Expand Down */ +#define SdaTSS 0x0000090000000000ull /* Available TSS */ +#define SdbTSS 0x00000b0000000000ull /* Busy TSS */ +#define SdCG 0x00000c0000000000ull /* Call Gate */ +#define SdIG 0x00000e0000000000ull /* Interrupt Gate */ +#define SdTG 0x00000f0000000000ull /* Trap Gate */ +#define SdCODE 0x0000080000000000ull /* Code/Data */ +#define SdS 0x0000100000000000ull /* System/User */ +#define SdDPL0 0x0000000000000000ull /* Descriptor Privilege Level */ +#define SdDPL1 0x0000200000000000ull +#define SdDPL2 0x0000400000000000ull +#define SdDPL3 0x0000600000000000ull +#define SdP 0x0000800000000000ull /* Present */ +#define Sd4G 0x000f00000000ffffull /* 4G Limit */ +#define SdL 0x0020000000000000ull /* Long Attribute */ +#define SdD 0x0040000000000000ull /* Default Operand Size */ +#define SdG 0x0080000000000000ull /* Granularity */ + +#define PeHo 0x0000020000000000ull /* Host only */ +#define PeGo 0x0000010000000000ull /* Guest only */ +#define PeEvMskH 0x0000000f00000000ull /* Event mask H */ +#define PeCtMsk 0x00000000ff000000ull /* Counter mask */ +#define PeInMsk 0x0000000000800000ull /* Invert mask */ +#define PeCtEna 0x0000000000400000ull /* Counter enable */ +#define PeInEna 0x0000000000100000ull /* Interrupt enable */ +#define PePnCtl 0x0000000000080000ull /* Pin control */ +#define PeEdg 0x0000000000040000ull /* Edge detect */ +#define PeOS 0x0000000000020000ull /* OS mode */ +#define PeUsr 0x0000000000010000ull /* User mode */ +#define PeUnMsk 0x000000000000ff00ull /* Unit Mask */ +#define PeEvMskL 0x00000000000000ffull /* Event Mask L */ + +#define PeEvMsksh 32ull /* Event mask shift */ + +#define SsRPL0 0x0000 /* Requestor Privilege Level */ +#define SsRPL1 0x0001 +#define SsRPL2 0x0002 +#define SsRPL3 0x0003 +#define SsTIGDT 0x0000 /* GDT Table Indicator */ +#define SsTILDT 0x0004 /* LDT Table Indicator */ +#define SsSIM 0xfff8 /* Selector Index Mask */ + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +#define SiNULL 0 /* NULL selector index */ +#define SiCS 1 /* CS selector index */ +#define SiDS 2 /* DS selector index */ +#define SiU32CS 3 /* User CS selector index */ +#define SiUDS 4 /* User DS selector index */ +#define SiUCS 5 /* User CS selector index */ +#define SiFS 6 /* FS selector index */ +#define SiGS 7 /* GS selector index */ +#define SiTSS 8 /* TSS selector index */ + +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ --- /sys/src/fs/amd64/dosfs.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/dosfs.c Tue Aug 27 16:10:50 2013 @@ -0,0 +1,843 @@ +#include "all.h" +#include "io.h" + +#include "dosfs.h" + +#define chat(...) //print(__VA_ARGS__) + +/* + * block io buffers + */ +typedef struct Clustbuf Clustbuf; + +struct Clustbuf +{ + int flags; + int age; + Devsize sector; + uchar * iobuf; + Dos * dos; + int size; + int bufsize; +}; + +enum +{ + Nbio= 16, + LOCKED= 1, + MOD= 2, + IMMED= 4, +}; + +static void puttime(Dosdir*); + +static Clustbuf bio[Nbio]; + +/* + * write an io buffer and update its flags + */ +static void +writeclust(Clustbuf *p) +{ + Dos *dos; + Off addr; + + dos = p->dos; + addr = (p->sector+dos->start)*dos->sectbytes; + chat("writeclust @ %lld addr %lld...", (Wideoff)p->sector, + (Wideoff)addr); + if(dos->write(dos->dev, p->iobuf, p->size, addr) != p->size) + panic("writeclust: write"); + p->flags &= ~(MOD|IMMED); + chat("OK\n"); +} + +/* + * write any dirty buffers + */ +static void +syncclust(void) +{ + Clustbuf *p; + + for(p = bio; p < &bio[Nbio]; p++){ + if(p->flags & LOCKED) + panic("syncclust"); + if(p->flags & MOD) + writeclust(p); + } +} + +/* + * get an io buffer, possibly with valid data + */ +static Clustbuf* +getclust0(Dos *dos, Off sector) +{ + Clustbuf *p, *oldest; + + chat("getclust0 @ %lld\n", (Wideoff)sector); + + /* + * if we have it, just return it + * otherwise, reuse the oldest unlocked entry + */ + oldest = 0; + for(p = bio; p < &bio[Nbio]; p++){ + if(sector == p->sector && dos == p->dos){ + if(p->flags & LOCKED) + panic("getclust0 locked"); + chat("getclust0 %lld in cache\n", (Wideoff)sector); + p->flags |= LOCKED; + return p; + } + if(p->flags & LOCKED) + continue; + if(oldest == 0 || p->age <= oldest->age) + oldest = p; + } + p = oldest; + if(p == 0) + panic("getclust0 all locked"); + p->flags |= LOCKED; + if(p->flags & MOD) + writeclust(p); + + /* + * make sure the buffer is big enough + */ + if(p->iobuf==0 || p->bufsize < dos->clustbytes){ + p->bufsize = dos->clustbytes; + p->iobuf = ialloc(p->bufsize, 0); + } + if(sector >= dos->dataaddr) + p->size = dos->clustbytes; + else + p->size = dos->sectbytes; + p->dos = 0; /* make it invalid */ + return p; +} + +/* + * get an io block from an io buffer + */ +static Clustbuf* +getclust(Dos *dos, Off sector) +{ + Clustbuf *p; + Off addr; + + p = getclust0(dos, sector); + if(p->dos){ + p->age = Ticks; + return p; + } + addr = (sector+dos->start)*dos->sectbytes; + chat("getclust read addr %lld\n", (Wideoff)addr); + if(dos->read(dos->dev, p->iobuf, p->size, addr) != p->size){ + chat("can't read block\n"); + return 0; + } + + p->age = Ticks; + p->dos = dos; + p->sector = sector; + chat("getclust %lld read\n", (Wideoff)sector); + return p; +} + +/* + * get an io block from an io buffer; + * any current data is discarded. + */ +static Clustbuf* +getclustz(Dos *dos, Off sector) +{ + Clustbuf *p; + + p = getclust0(dos, sector); + p->age = Ticks; + p->dos = dos; + p->sector = sector; + memset(p->iobuf, 0, p->size); + p->flags |= MOD; + chat("getclustz %lld\n", (Wideoff)sector); + return p; +} + +/* + * release an io buffer + */ +static void +putclust(Clustbuf *p) +{ + if(!(p->flags & LOCKED)) + panic("putclust lock"); + if((p->flags & (MOD|IMMED)) == (MOD|IMMED)) + writeclust(p); + p->flags &= ~LOCKED; + chat("putclust @ sector %lld...", (Wideoff)p->sector); +} + +/* + * walk the fat one level ( n is a current cluster number ). + * return the new cluster number or -1 if no more. + */ +static long +fatwalk(Dos *dos, int n) +{ + uint k, sect; + Clustbuf *p; + int o; + + chat("fatwalk %d\n", n); + + if(n < 2 || n >= dos->fatclusters) + return -1; + + switch(dos->fatbits){ + case 12: + k = (3*n)/2; break; + case 16: + k = 2*n; break; + default: + return -1; + } + if(k >= dos->fatbytes) + panic("getfat"); + + sect = k/dos->sectbytes + dos->fataddr; + o = k%dos->sectbytes; + p = getclust(dos, sect); + k = p->iobuf[o++]; + if(o >= dos->sectbytes){ + putclust(p); + p = getclust(dos, sect+1); + o = 0; + } + k |= p->iobuf[o]<<8; + putclust(p); + if(dos->fatbits == 12){ + if(n&1) + k >>= 4; + else + k &= 0xfff; + if(k >= 0xff8) + k |= 0xf000; + } + k = k < 0xfff8 ? k : -1; + chat("fatwalk %d -> %lud\n", n, k); + return k; +} + +/* + * write a value into each copy of the fat. + */ +static void +fatwrite(Dos *dos, int n, int val) +{ + Off k, sect; + Clustbuf *p; + int i, o; + + chat("fatwrite %d %d...", n, val); + + if(n < 2 || n >= dos->fatclusters) + panic("fatwrite n"); + + switch(dos->fatbits){ + case 12: + k = (3*n)/2; break; + case 16: + k = 2*n; break; + default: + panic("fatwrite fatbits"); + return; + } + if(k >= dos->fatbytes) + panic("fatwrite k"); + + for(i=0; infats; i++, k+=dos->fatbytes){ + sect = k/dos->sectbytes + dos->fataddr; + o = k%dos->sectbytes; + p = getclust(dos, sect); + if(p == 0) + panic("fatwrite getclust"); + switch(dos->fatbits){ + case 12: + if(n&1){ + p->iobuf[o] &= 0x0f; + p->iobuf[o++] |= val<<4; + }else + p->iobuf[o++] = val; + if(o >= dos->sectbytes){ + p->flags |= MOD; + putclust(p); + p = getclust(dos, sect+1); + if(p == 0) + panic("fatwrite getclust"); + o = 0; + } + if(n&1) + p->iobuf[o] = val>>4; + else{ + p->iobuf[o] &= 0xf0; + p->iobuf[o] |= (val>>8)&0x0f; + } + break; + case 16: + p->iobuf[o++] = val; + p->iobuf[o] = val>>8; + break; + } + p->flags |= MOD; + putclust(p); + } + chat("OK\n"); +} + +/* + * allocate a free cluster from the fat. + */ +static int +fatalloc(Dos *dos) +{ + Clustbuf *p; + int n; + + n = dos->freeptr; + for(;;){ + if(fatwalk(dos, n) == 0) + break; + if(++n >= dos->fatclusters) + n = 2; + if(n == dos->freeptr) + return -1; + } + dos->freeptr = n+1; + if(dos->freeptr >= dos->fatclusters) + dos->freeptr = 2; + fatwrite(dos, n, 0xffff); + p = getclustz(dos, dos->dataaddr + (n-2)*dos->clustsize); + putclust(p); + return n; +} + +/* + * map a file's logical sector address to a physical sector address + */ +static long +fileaddr(Dosfile *fp, Off ltarget, Clustbuf *pdir) +{ + Dos *dos = fp->dos; + Dosdir *dp; + Off p; + + chat("fileaddr %8.8s %lld\n", fp->name, (Wideoff)ltarget); + /* + * root directory is contiguous and easy + */ + if(fp->pdir == 0){ + if(ltarget*dos->sectbytes >= dos->rootsize*sizeof(Dosdir)) + return -1; + p = dos->rootaddr + ltarget; + chat("fileaddr %lld -> %lld\n", (Wideoff)ltarget, (Wideoff)p); + return p; + } + if(fp->pstart == 0){ /* empty file */ + if(!pdir) + return -1; + p = fatalloc(dos); + if(p <= 0) + return -1; + chat("fileaddr initial alloc %lld\n", (Wideoff)p); + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + dp->start[0] = p; + dp->start[1] = p>>8; + pdir->flags |= MOD; + fp->pstart = p; + fp->pcurrent = p; + fp->lcurrent = 0; + } + /* + * anything else requires a walk through the fat + * [lp]current will point to the last cluster if we run off the end + */ + ltarget /= dos->clustsize; + if(fp->pcurrent == 0 || fp->lcurrent > ltarget){ + /* go back to the beginning */ + fp->lcurrent = 0; + fp->pcurrent = fp->pstart; + } + while(fp->lcurrent < ltarget){ + /* walk the fat */ + p = fatwalk(dos, fp->pcurrent); + if(p < 0){ + if(!pdir) + return -1; + p = fatalloc(dos); + if(p < 0){ + print("file system full\n"); + return -1; + } + fatwrite(dos, fp->pcurrent, p); + } + fp->pcurrent = p; + ++fp->lcurrent; + } + + /* + * clusters start at 2 instead of 0 (why? - presotto) + */ + p = dos->dataaddr + (fp->pcurrent-2)*dos->clustsize; + chat("fileaddr %lld -> %lld\n", (Wideoff)ltarget, (Wideoff)p); + return p; +} + +/* + * set up a dos file name + */ +static void +setname(char *name, char *ext, char *from) +{ + char *to; + + memset(name, ' ', 8); + memset(ext, ' ', 3); + + to = name; + for(; *from && to-name < 8; from++, to++){ + if(*from == '.'){ + from++; + break; + } + if(*from >= 'a' && *from <= 'z') + *to = *from + 'A' - 'a'; + else + *to = *from; + } + to = ext; + for(; *from && to-ext < 3; from++, to++){ + if(*from >= 'a' && *from <= 'z') + *to = *from + 'A' - 'a'; + else + *to = *from; + } + + chat("name is %8.8s %3.3s\n", name, ext); +} + +/* + * walk a directory returns + * -1 if something went wrong + * 0 if not found + * 1 if found + */ +static int +doswalk(Dosfile *fp, char *name) +{ + char dname[8], dext[3]; + Clustbuf *p; + Dosdir *dp; + Off o, addr; + + chat("walk(%s)\n", name); + if((fp->attr & DOSDIR) == 0){ + chat("walking non-directory!\n"); + return -1; + } + + setname(dname, dext, name); + + fp->offset = 0; /* start at the beginning */ + for(;;){ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, 0); + if(addr < 0) + return 0; + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + for(o=0; osize; o += sizeof(Dosdir)){ + dp = (Dosdir *)(p->iobuf + o); + chat("comparing to %8.8s.%3.3s\n", (char*)dp->name, (char*)dp->ext); + if(memcmp(dname, dp->name, sizeof(dp->name)) != 0) + continue; + if(memcmp(dext, dp->ext, sizeof(dp->ext)) == 0) + goto Found; + } + fp->offset += p->size; + putclust(p); + } + +Found: + fp->pdir = p->sector; + fp->odir = o; + putclust(p); + memmove(fp->name, dname, sizeof(fp->name)); + memmove(fp->ext, dext, sizeof(fp->ext)); + fp->attr = dp->attr; + fp->length = GLONG(dp->length); + fp->pstart = GSHORT(dp->start); + fp->pcurrent = 0; + fp->lcurrent = 0; + fp->offset = 0; + return 1; +} + +static void +bootdump(Dosboot *b) +{ + USED(b); + chat("magic: 0x%2.2x 0x%2.2x 0x%2.2x\n", + b->magic[0], b->magic[1], b->magic[2]); + chat("version: \"%8.8s\"\n", (char*)b->version); + chat("sectbytes: %d\n", GSHORT(b->sectbytes)); + chat("allocsize: %d\n", b->clustsize); + chat("nresrv: %d\n", GSHORT(b->nresrv)); + chat("nfats: %d\n", b->nfats); + chat("rootsize: %d\n", GSHORT(b->rootsize)); + chat("volsize: %d\n", GSHORT(b->volsize)); + chat("mediadesc: 0x%2.2x\n", b->mediadesc); + chat("fatsize: %d\n", GSHORT(b->fatsize)); + chat("trksize: %d\n", GSHORT(b->trksize)); + chat("nheads: %d\n", GSHORT(b->nheads)); + chat("nhidden: %d\n", GLONG(b->nhidden)); + chat("bigvolsize: %d\n", GLONG(b->bigvolsize)); + chat("driveno: %d\n", b->driveno); + chat("reserved0: 0x%2.2x\n", b->reserved0); + chat("bootsig: 0x%2.2x\n", b->bootsig); + chat("volid: 0x%8.8x\n", GLONG(b->volid)); + chat("label: \"%11.11s\"\n", (char*)b->label); +} + +/* + * instructions that boot blocks can start with + */ +#define JMPSHORT 0xeb +#define JMPNEAR 0xe9 + +/* + * read dos file system properties + */ +int +dosinit(Dos *dos) +{ + Clustbuf *p; + Dospart *dp; + Dosboot *b; + int i; + + chat("dosinit()\n"); + /* defaults till we know better */ + dos->start = 0; + dos->sectbytes = 512; + dos->clustsize = 1; + dos->clustbytes = 512; + + /* get first sector */ + p = getclust(dos, 0); + if(p == 0){ + chat("can't read boot block\n"); + return -1; + } + p->dos = 0; + + /* if a hard disk format, look for an active partition */ + b = (Dosboot *)p->iobuf; + if(b->magic[0] != JMPNEAR && (b->magic[0] != JMPSHORT || b->magic[2] != 0x90)){ + /* is the 0x55 in error here? */ + if(p->iobuf[0x1fe] != 0x55 || p->iobuf[0x1ff] != 0xaa){ + print("no dos file system or partition table\n"); + putclust(p); + return -1; + } + dp = (Dospart*)&p->iobuf[0x1be]; + for(i = 0; i < 4; i++, dp++) + if(dp->type && dp->flag == 0x80) + break; + if(i == 4){ + putclust(p); + return -1; + } + dos->start += GLONG(dp->start); + putclust(p); + p = getclust(dos, 0); + if(p == 0){ + chat("can't read boot block\n"); + putclust(p); + return -1; + } + p->dos = 0; + } + + b = (Dosboot *)p->iobuf; + if(b->magic[0] != JMPNEAR && (b->magic[0] != JMPSHORT || b->magic[2] != 0x90)){ + print("no dos file system\n"); + putclust(p); + return -1; + } + + bootdump(b);/**/ + + /* + * determine the systems' wonderous properties + */ + dos->sectbytes = GSHORT(b->sectbytes); + dos->clustsize = b->clustsize; + dos->clustbytes = dos->sectbytes*dos->clustsize; + dos->nresrv = GSHORT(b->nresrv); + dos->nfats = b->nfats; + dos->rootsize = GSHORT(b->rootsize); + dos->volsize = GSHORT(b->volsize); + if(dos->volsize == 0) + dos->volsize = GLONG(b->bigvolsize); + dos->mediadesc = b->mediadesc; + dos->fatsize = GSHORT(b->fatsize); + dos->fatbytes = dos->sectbytes*dos->fatsize; + dos->fataddr = dos->nresrv; + dos->rootaddr = dos->fataddr + dos->nfats*dos->fatsize; + i = dos->rootsize*sizeof(Dosdir) + dos->sectbytes - 1; + i = i/dos->sectbytes; + dos->dataaddr = dos->rootaddr + i; + dos->fatclusters = 2+(dos->volsize - dos->dataaddr)/dos->clustsize; + if(dos->fatclusters < 4087) + dos->fatbits = 12; + else + dos->fatbits = 16; + dos->freeptr = 2; + putclust(p); + + /* + * set up the root + */ + dos->root.dos = dos; + dos->root.pdir = 0; + dos->root.odir = 0; + memmove(dos->root.name, " ", 8); + memmove(dos->root.ext, " ", 3); + dos->root.attr = DOSDIR; + dos->root.length = dos->rootsize*sizeof(Dosdir); + dos->root.pstart = 0; + dos->root.lcurrent = 0; + dos->root.pcurrent = 0; + dos->root.offset = 0; + + syncclust(); + return 0; +} + +static char * +nextelem(char *path, char *elem) +{ + int i; + + while(*path == '/') + path++; + if(*path==0 || *path==' ') + return 0; + for(i=0; *path && *path != '/' && *path != ' '; i++){ + if(i >= NAMELEN){ + print("name component too long\n"); + return 0; + } + *elem++ = *path++; + } + *elem = 0; + return path; +} + +static void +puttime(Dosdir *d) +{ + Timet secs; + Rtc rtc; + ushort x; + + secs = rtctime(); + sec2rtc(secs, &rtc); + x = (rtc.hour<<11) | (rtc.min<<5) | (rtc.sec>>1); + d->time[0] = x; + d->time[1] = x>>8; + x = ((rtc.year-80)<<9) | ((rtc.mon+1)<<5) | rtc.mday; + d->date[0] = x; + d->date[1] = x>>8; +} + +Dosfile* +dosopen(Dos *dos, char *path, Dosfile *fp) +{ + char element[NAMELEN]; + + chat("dosopen(%s)\n", path); + *fp = dos->root; + while(path = nextelem(path, element)){ + switch(doswalk(fp, element)){ + case -1: + print("error walking to %s\n", element); + return 0; + case 0: + print("%s not found\n", element); + return 0; + case 1: + print("found %s attr 0x%ux start 0x%llux len %lld\n", + element, fp->attr, (Wideoff)fp->pstart, + (Wideoff)fp->length); + break; + } + } + + syncclust(); + return fp; +} + +/* + * read from a dos file + */ +long +dosread(Dosfile *fp, void *a, long n) +{ + Off addr, k, o; + Clustbuf *p; + uchar *to; + + chat("dosread(,,%ld)\n", n); + if((fp->attr & DOSDIR) == 0){ + if(fp->offset >= fp->length) + return 0; + if(fp->offset+n > fp->length) + n = fp->length - fp->offset; + } + to = a; + while(n > 0){ + /* + * read the data; sectors below dos->dataaddr + * are read one at a time. + */ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, 0); + if(addr < 0) + return -1; + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + /* + * copy the bytes we need + */ + o = fp->offset % p->size; + k = p->size - o; + if(k > n) + k = n; + memmove(to, p->iobuf+o, k); + putclust(p); + to += k; + fp->offset += k; + n -= k; + } + syncclust(); + return to - (uchar *)a; +} + +/* + * write to a dos file + */ +long +doswrite(Dosfile *fp, void *a, long n) +{ + Off blksize, addr, k, o; + Clustbuf *p, *pdir; + Dosdir *dp; + uchar *from; + + if(fp->attr & DOSDIR){ + print("write dir\n"); + return -1; + } + if(fp->pdir){ + pdir = getclust(fp->dos, fp->pdir); + /* + * should do consistency check if + * concurrent access is possible. + */ + if(pdir == 0) + panic("doswrite"); + }else + pdir = 0; + blksize = pdir ? fp->dos->clustbytes : fp->dos->sectbytes; + from = a; + while(n > 0){ + addr = fileaddr(fp, fp->offset/fp->dos->sectbytes, pdir); + if(addr < 0) + return -1; + o = fp->offset % blksize; + if(o == 0 && n >= blksize) + p = getclustz(fp->dos, addr); + else + p = getclust(fp->dos, addr); + if(p == 0) + return -1; + /* + * copy the bytes we need + */ + k = p->size - o; + if(k > n) + k = n; + memmove(p->iobuf+o, from, k); + p->flags |= MOD; + putclust(p); + from += k; + fp->offset += k; + n -= k; + } + if(pdir){ + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + if(fp->offset > fp->length){ + fp->length = fp->offset; + dp->length[0] = fp->length; + dp->length[1] = fp->length>>8; + dp->length[2] = fp->length>>16; + dp->length[3] = fp->length>>24; + } + pdir->flags |= MOD; + putclust(pdir); + } + syncclust(); + return from - (uchar *)a; +} + +/* + * truncate a dos file to zero length + */ +int +dostrunc(Dosfile *fp) +{ + Clustbuf *pdir; + Dosdir *dp; + Off p, np; + + if(fp->attr & DOSDIR){ + print("trunc dir\n"); + return -1; + } + pdir = getclust(fp->dos, fp->pdir); + if(pdir == 0) + panic("dostrunc"); + p = fatwalk(fp->dos, fp->pstart); + fatwrite(fp->dos, fp->pstart, 0xffff); + while(p >= 0){ + np = fatwalk(fp->dos, p); + fatwrite(fp->dos, p, 0); + p = np; + } + fp->length = 0; + dp = (Dosdir *)(pdir->iobuf + fp->odir); + puttime(dp); + dp->length[0] = 0; + dp->length[1] = 0; + dp->length[2] = 0; + dp->length[3] = 0; + pdir->flags |= MOD; + putclust(pdir); + syncclust(); + return 0; +} --- /sys/src/fs/amd64/dosfs.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/dosfs.h Tue Aug 27 16:10:50 2013 @@ -0,0 +1,126 @@ +typedef struct Dosboot Dosboot; +typedef struct Dos Dos; +typedef struct Dosdir Dosdir; +typedef struct Dosfile Dosfile; +typedef struct Dospart Dospart; + +struct Dospart +{ + uchar flag; /* active flag */ + uchar shead; /* starting head */ + uchar scs[2]; /* starting cylinder/sector */ + uchar type; /* partition type */ + uchar ehead; /* ending head */ + uchar ecs[2]; /* ending cylinder/sector */ + uchar start[4]; /* starting sector */ + uchar len[4]; /* length in sectors */ +}; + +struct Dosboot{ + uchar magic[3]; + uchar version[8]; + uchar sectbytes[2]; + uchar clustsize; + uchar nresrv[2]; + uchar nfats; + uchar rootsize[2]; + uchar volsize[2]; + uchar mediadesc; + uchar fatsize[2]; + uchar trksize[2]; + uchar nheads[2]; + uchar nhidden[4]; + uchar bigvolsize[4]; + uchar driveno; + uchar reserved0; + uchar bootsig; + uchar volid[4]; + uchar label[11]; + uchar reserved1[8]; +}; + +struct Dosfile{ + Dos * dos; /* owning dos file system */ + int pdir; /* sector containing directory entry */ + int odir; /* offset to same */ + char name[8]; + char ext[3]; + uchar attr; + Devsize length; + Devsize pstart; /* physical start cluster address */ + Devsize pcurrent; /* physical current cluster address */ + Devsize lcurrent; /* logical current cluster address */ + Devsize offset; +}; + +struct Dos{ + int dev; /* device id */ + Off (*read)(int, void*, long, Devsize); /* read routine */ + Off (*write)(int, void*, long, Devsize); /* write routine */ + + uvlong start; /* start of file system (sector no.) */ + int sectbytes; /* size of a sector */ + int clustsize; /* size of a cluster (in sectors) */ + int clustbytes; /* size of a cluster (in bytes) */ + int nresrv; /* sectors */ + int nfats; /* usually 2 */ + int rootsize; /* number of entries */ + int volsize; /* in sectors */ + int mediadesc; + int fatsize; /* size of a fat (in sectors) */ + int fatbytes; /* size of a fat (in bytes) */ + int fatclusters; /* no. of clusters governed by fat */ + int fatbits; /* 12 or 16 */ + Devsize fataddr; /* sector address of first fat */ + Devsize rootaddr; /* sector address of root directory */ + Devsize dataaddr; /* sector address of first data block */ + Devsize freeptr; /* for cluster allocation */ + + Dosfile root; +}; + +struct Dosdir{ + uchar name[8]; + uchar ext[3]; + uchar attr; + uchar reserved[10]; + uchar time[2]; + uchar date[2]; + uchar start[2]; + uchar length[4]; +}; + +enum{ + FAT12 = 0x01, + FAT16 = 0x04, + EXTEND = 0x05, + FATHUGE = 0x06, + FAT32 = 0x0b, + FAT32X = 0x0c, + EXTHUGE = 0x0f, + DMDDO = 0x54, + PLAN9 = 0x39, + LEXTEND = 0x85, +}; + +enum{ + DRONLY = 0x01, + DHIDDEN = 0x02, + DSYSTEM = 0x04, + DVLABEL = 0x08, + DOSDIR = 0x10, + DARCH = 0x20, +}; + +#define GSHORT(p) (((p)[1]<<8)|(p)[0]) +#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p)) +#define GLSHORT(p) (((p)[0]<<8)|(p)[1]) +#define GLLONG(p) ((GLSHORT(p)<<16)|GLSHORT(p+2)) + +extern int dosinit(Dos*); +extern Dosfile* dosopen(Dos*, char*, Dosfile*); +extern int dostrunc(Dosfile*); +extern long dosread(Dosfile*, void*, long); +extern long doswrite(Dosfile*, void*, long); + +extern Dos dos; --- /sys/src/fs/amd64/nvr.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/nvr.c Tue Aug 27 16:10:50 2013 @@ -0,0 +1,55 @@ +#include "all.h" +#include "io.h" +#include "ureg.h" + +#include "dosfs.h" + +static Dosfile file; +static int opened; +char nvrfile[128] = "plan9.nvr"; + +static void +nvopen(void) +{ + Mpl s; + Dosfile *fp; + + if(opened) + return; + opened = 1; + s = spllo(); + fp = dosopen(&dos, nvrfile, &file); + splx(s); + if(fp == 0) + panic("can't open %s\n", nvrfile); +} + +int +nvread(int offset, void *a, int n) +{ + int r; + Mpl s; + + nvopen(); + + s = spllo(); + file.offset = offset; + r = dosread(&file, a, n); + splx(s); + return r; +} + +int +nvwrite(int offset, void *a, int n) +{ + int r; + Mpl s; + + nvopen(); + + s = spllo(); + file.offset = offset; + r = doswrite(&file, a, n); + splx(s); + return r; +} --- /sys/src/fs/amd64/lock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/lock.c Tue Aug 27 16:10:51 2013 @@ -0,0 +1,353 @@ + #include "all.h" + +#define deccnt(x) if(x) adec((int*)&x->nlock) +#define inccnt(x) if(x) ainc((int*)&x->nlock) + +void +printlocks(User *up) +{ + int i; + + for(i = 0; i < up->nlock; i++){ + print("%#p:%#p", up->lstack[i], up->pstack[i]); + if((i%4) == 0) + print("\n"); + } + if(i>0 && i%4) + print("\n"); +} + +void +lock(Lock *l) +{ + int i, nl; + uintptr pc; + + pc = getcallerpc(&l); + nl = 0; + if(u) + nl = u->nlock; +loop: + inccnt(u); /* prevent being scheded */ + if(tas(l) == 0) { + l->pc = pc; + if(u){ + u->lstack[nl] = l; + u->pstack[nl] = pc; + } + return; + } + deccnt(u); + + for(i = 0; i < 1000000; i++){ + if(l->sbsem) + continue; + inccnt(u); + if(tas(l) == 0) { + l->pc = pc; + if(u){ + u->lstack[nl] = l; + u->pstack[nl] = pc; + } + return; + } + deccnt(u); + } + l->sbsem = 0; // BOTCH + + print("lock loop %d:%#p called by %#p held by pc %#p\n", u?u->pid:-1, l, pc, l->pc); + if(u) + dumpstack(u); + dotrace(0); + if(islo()) + sched(); + else + print("ilock deadlock\n"); + goto loop; +} + +void +unlock(Lock *l) +{ + if(l->sbsem == 0) + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + l->pc = 0; + l->sbsem = 0; + coherence(); + + if(u && adec((int*)&u->nlock) == 0) + if(u->delaysched) + if(islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + u->delaysched = 0; + sched(); + } +} + +int +canlock(Lock *l) +{ + inccnt(u); + if(tas(l)){ + deccnt(u); + return 0; + } + l->pc = getcallerpc(&l); + return 1; +} + +void +ilock(Lock *l) +{ + uintptr pc; + Mpl x; + + pc = getcallerpc(&l); + + x = splhi(); + if(tas(l) == 0) + goto acquire; + + if(!l->isilock) + panic("ilock: not ilock %#p", pc); + if(l->m == MACHP(m->machno)) + panic("ilock: deadlock cpu%d pc %#p lpc %#p\n", m->machno, pc, l->pc); + for(;;){ + splx(x); + while(l->sbsem) + ; + x = splhi(); + if(tas(l) == 0) + goto acquire; + } +acquire: +// m->ilockdepth++; +// if(u) +// u->lastilock = l; + l->sr = x; + l->pc = pc; + l->p = u; + l->isilock = 1; + l->m = MACHP(m->machno); +} + +void +iunlock(Lock *l) +{ + Mpl sr; + + if(l->sbsem == 0) + panic("iunlock nolock: pc %#p", getcallerpc(&l)); + if(l->isilock == 0) + print("iunlock lock: pc %#p held by %#p\n", getcallerpc(&l), l->pc); + if(islo()) + print("iunlock lo: %#p held by %#p\n", getcallerpc(&l), l->pc); + + sr = l->sr; + l->m = 0; + l->sbsem = 0; +// m->ilockdepth--; + + coherence(); + +// if(u) +// u->lastilock = 0; + splx(sr); +} + +void +qlock(QLock *q) +{ + User *p; + int i; + + lock(q); + if(!q->locked){ + q->locked = 1; + unlock(q); + goto out; + } + if(u) { + for(i=0; ihas.q[i] == q) { + print("circular qlock by %d at %#p (other %#p, %#p\n", + u->pid, getcallerpc(&q), u->has.pc[i], q->pc); + dumpstack(u); + break; + } + } + p = q->tail; + if(p == 0) + q->head = u; + else + p->qnext = u; + q->tail = u; + u->qnext = 0; + u->state = Queueing; + u->has.want = q; + unlock(q); + sched(); + u->has.want = 0; + +out: + if(u) { + for(i=0; ihas.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&q); + return; + } + print("NHAS(%d) too small\n", NHAS); + } +} + +int +canqlock(QLock *q) +{ + int i; + + lock(q); + if(q->locked){ + unlock(q); + return 0; + } + q->locked = 1; + unlock(q); + + if(u){ + for(i=0; ihas.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&q); + return 1; + } + print("NHAS(%d) too small\n", NHAS); + } + return 1; +} + +void +qunlock(QLock *q) +{ + User *p; + int i; + + lock(q); + p = q->head; + if(p) { + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(q); + ready(p); + } else { + q->locked = 0; + unlock(q); + } + + if(u){ + for(i=0; ihas.q[i] == q) { + u->has.q[i] = 0; + return; + } + panic("qunlock: not there %#p, called from %#p\n", + q, getcallerpc(&q)); + } +} + +/* + * readers/writers lock + * allows 1 writer or many readers + */ +void +rlock(RWlock *l) +{ + QLock *q; + + qlock(&l->wr); /* wait here for writers and exclusion */ + + q = &l->rd; /* first reader in, qlock(&l->rd) */ + lock(q); + q->locked = 1; + l->nread++; + unlock(q); + + qunlock(&l->wr); + + if(u){ + int i; + int found; + + found = 0; + for(i=0; ihas.q[i] == q){ + print("circular rlock by %d at %#p (other %#p)\n", + u->pid, getcallerpc(&l), u->has.pc[i]); + dumpstack(u); + } + if(!found && u->has.q[i] == 0) { + u->has.q[i] = q; + u->has.pc[i] = getcallerpc(&l); + found = 1; + } + } + if(!found) + print("NHAS(%d) too small\n", NHAS); + } +} + +void +runlock(RWlock *l) +{ + QLock *q; + User *p; + int n; + + q = &l->rd; + lock(q); + n = l->nread - 1; + l->nread = n; + if(n == 0) { /* last reader out, qunlock(&l->rd) */ + p = q->head; + if(p) { + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(q); + ready(p); + goto accounting; + } + q->locked = 0; + } + unlock(q); + +accounting: + if(u){ + int i; + for(i=0; ihas.q[i] == q) { + u->has.q[i] = 0; + return; + } + panic("runlock: not there %#p, called from %#p\n", + q, getcallerpc(&l)); + } +} + +void +wlock(RWlock *l) +{ + qlock(&l->wr); /* wait here for writers and exclusion */ + qlock(&l->rd); /* wait here for last reader */ +} + +void +wunlock(RWlock *l) +{ + qunlock(&l->rd); + qunlock(&l->wr); +} --- /sys/src/fs/amd64/etherif.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/etherif.c Tue Aug 27 16:10:51 2013 @@ -0,0 +1,381 @@ +#include "all.h" +#include "io.h" + +#include "../ip/ip.h" +#include "../dev/aoe.h" +#include "etherif.h" + +#define dprint(...) /* print(__VA_ARGS__) */ + +Ether etherif[MaxEther]; +int nether; + +void +etheriq(Ether* ether, Msgbuf* mb) +{ + ilock(ðer->rqlock); + if(ether->rqhead) + ether->rqtail->next = mb; + else + ether->rqhead = mb; + ether->rqtail = mb; + mb->next = 0; + iunlock(ðer->rqlock); + + wakeup(ðer->rqr); +} + +static int +isinput(void* arg) +{ + return ((Ether*)arg)->rqhead != 0; +} + +static void +etheri(void) +{ + Ether *ether; + Ifc *ifc; + Msgbuf *mb; + Enpkt *p; + + ether = u->arg; + ifc = ðer->ifc; + print("ether%di: %E %I\n", ether->ctlrno, ether->ifc.ea, ether->ifc.ipa); + ether->attach(ether); + + for(;;) { + sleep(ðer->rqr, isinput, ether); + + ilock(ðer->rqlock); + if(ether->rqhead == 0) { + iunlock(ðer->rqlock); + continue; + } + mb = ether->rqhead; + ether->rqhead = mb->next; + iunlock(ðer->rqlock); + + p = (Enpkt*)mb->data; + switch(nhgets(p->type)){ + case Arptype: + arpreceive(p, mb->count, ifc); + break; + case Cectype: + cecreceive(p, mb->count, ifc); + break; + case Aoetype: + aoereceive(p, mb->count, ifc); + break; + case Iptype: + ipreceive(p, mb->count, ifc); + break; + default: + goto done; + } + ifc->rxpkt++; + ifc->work.count++; + ifc->rate.count += mb->count; + done: + mbfree(mb); + } +} + +#ifdef no +static void +ethero(void) +{ + Ether *ether; + Ifc *ifc; + Msgbuf *mb; + int len; + + ether = u->arg; + ifc = ðer->ifc; + print("ether%do: %E %I\n", ether->ctlrno, ifc->ea, ifc->ipa); + + for(;;) { + mb = recv(ifc->reply, 1); + if(mb == nil) + continue; + + len = mb->count; + if(len > ether->ifc.maxmtu){ + print("ether%do: pkt too big - %d\n", ether->ctlrno, len); + mbfree(mb); + continue; + } + if(len < ETHERMINTU) { + memset(mb->data+len, 0, ETHERMINTU-len); + mb->count = len = ETHERMINTU; + } + memmove(((Enpkt*)(mb->data))->s, ifc->ea, sizeof(ifc->ea)); + + ilock(ðer->tqlock); + if(ether->tqhead) + ether->tqtail->next = mb; + else + ether->tqhead = mb; + ether->tqtail = mb; + mb->next = 0; + iunlock(ðer->tqlock); + + ether->transmit(ether); + + ifc->work.count++; + ifc->rate.count += len; + ifc->txpkt++; + } +} + +Msgbuf* +etheroq(Ether* ether) +{ + Msgbuf *mb; + + mb = nil; + ilock(ðer->tqlock); + if(ether->tqhead){ + mb = ether->tqhead; + ether->tqhead = mb->next; + } + iunlock(ðer->tqlock); + + return mb; +} +#endif + +/* + * look, ma. no extra queue. + */ +static void +ethero(void) +{ + Ether *e; + + e = u->arg; + print("ether%do: %E %I\n", e->ctlrno, e->ifc.ea, e->ifc.ipa); + + for(;;){ + recv(e->ifc.reply, 0); // wait for something to do. + e->transmit(e); + } +} + +Msgbuf* +etheroq(Ether* e) +{ + Msgbuf *m; + Enpkt *p; + Ifc *f; + int len; + + f = &e->ifc; +loop: + if(f->reply->count == 0) + return 0; + m = recv(f->reply, 1); + len = m->count; + if(len > f->maxmtu){ + print("ether%do: pkt too big - %d\n", e->ctlrno, len); + mbfree(m); + goto loop; + } + if(len < ETHERMINTU){ + memset(m->data+len, 0, ETHERMINTU-len); + m->count = len = ETHERMINTU; + } + p = (Enpkt*)m->data; + memmove(p->s, f->ea, sizeof f->ea); + + f->work.count++; + f->rate.count += len; + f->txpkt++; + + return m; +} + +Msgbuf* +etheroq1(Ether* e, int ret) +{ + Msgbuf *m; + Enpkt *p; + Ifc *f; + int len; + + f = &e->ifc; +loop: + if(ret){ + if(f->reply->count == 0) + return 0; + } + m = recv(f->reply, 1); + len = m->count; + if(len > f->maxmtu){ + print("ether%do: pkt too big - %d\n", e->ctlrno, len); + mbfree(m); + goto loop; + } + if(len < ETHERMINTU){ + memset(m->data+len, 0, ETHERMINTU-len); + m->count = len = ETHERMINTU; + } + p = (Enpkt*)m->data; + memmove(p->s, f->ea, sizeof f->ea); + + f->work.count++; + f->rate.count += len; + f->txpkt++; + + return m; +} + +static void +cmd_state(int, char*[]) +{ + int i; + Ifc *ifc; + + for(i = 0; i < nether; i++){ + if(etherif[i].mbps == 0) + continue; + + ifc = ðerif[i].ifc; + print("ether stats %d %E\n", etherif[i].ctlrno, etherif[i].ea); + print(" work =%9W pkts\n", &ifc->work); + print(" rate =%9W Bps\n", &ifc->rate); + print(" err = %3ld rc %3ld sum\n", ifc->rcverr, ifc->sumerr); + } +} + +void +etherstart(void) +{ + int i; + Ifc *ifc, *tail; + char buf[100], *p; + + nether = 0; + tail = 0; + for(i = 0; i < MaxEther; i++){ + if(etherif[i].mbps == 0) + continue; + + ifc = ðerif[i].ifc; + lock(ifc); + getipa(ifc, etherif[i].ctlrno); + if(!isvalidip(ifc->ipa)){ + unlock(ifc); + etherif[i].mbps = 0; + continue; + } + if(ifc->reply == 0){ + dofilter(&ifc->work); + dofilter(&ifc->rate); + ifc->reply = newqueue(Nqueue); + } + unlock(ifc); + + sprint(etherif[i].oname, "ether%do", etherif[i].ctlrno); + userinit(ethero, etherif+i, etherif[i].oname); + sprint(etherif[i].iname, "ether%di", etherif[i].ctlrno); + userinit(etheri, etherif+i, etherif[i].iname); + + ifc->next = nil; + if(enets != nil) + tail->next = ifc; + else + enets = ifc; + tail = ifc; + nether++; + } + + if(nether){ + cmd_install("state", "-- ether stats", cmd_state); + arpstart(); + if(p = getconf("route")){ + snprint(buf, sizeof buf, "route %s", p); + cmd_exec(buf); + } + } +} + +static int +parseether(uchar *to, char *from) +{ + char nip[4]; + char *p; + int i; + + p = from; + while(*p == ' ') + ++p; + for(i = 0; i < 6; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +int +ethercfgmatch(Ether *e, Pcidev *p, uintmem port) +{ + if(e->port == 0 || e->port == port) + if(e->tbdf == BUSUNKNOWN || p == nil || e->tbdf == p->tbdf){ + return 0; + } + return -1; +} + +void +etherinit(void) +{ + char buf[32]; + int i, n, ctlrno; + Ether *e; + + for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ + e = etherif+ctlrno; + memset(e, 0, sizeof *e); + if(!pciconfig("ether", ctlrno, e)) + continue; + for(n = 0; n < netherctlr; n++){ + if(cistrcmp(etherctlr[n].type, e->type)) + continue; + dprint("FOUND ether %s\n", etherctlr[n].type); + e->ctlrno = ctlrno; + e->tbdf = BUSUNKNOWN; + e->ifc.maxmtu = ETHERMAXTU; + for(i = 0; i < e->nopt; i++){ + if(strncmp(e->opt[i], "ea=", 3)) + continue; + if(parseether(e->ea, &e->opt[i][3]) == -1) + memset(e->ea, 0, Easize); + } + dprint(" reset ... "); + if(etherctlr[n].reset(e)){ + dprint("fail\n"); + break; + } + dprint("okay\n"); + if(e->irq == 2) + e->irq = 9; + memmove(e->ifc.ea, e->ea, sizeof e->ea); + snprint(buf, sizeof buf, "ether%d", ctlrno); + intrenable(e->irq, e->interrupt, e, e->tbdf, buf); + + print("ether%d: %s: %dMbps port %#p irq %d mtu %d", + ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu); + print(": %E\n", e->ea); + break; + } + } +} --- /sys/src/fs/amd64/ether82563.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/ether82563.c Tue Aug 27 16:10:53 2013 @@ -0,0 +1,1737 @@ +/* + * Intel 8256[367], 8257[1-9], 8258[03], i21[01], i350 + * Gigabit Ethernet PCI-Express Controllers + * Coraid EtherDrive® hba + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" + +/* this is pretty gross */ +#define Block Msgbuf +#define freeb(b) mbfree(b) +#define BLEN(b) ((b)->count) +#define rp data +#define iprint(...) print(__VA_ARGS__) + +/* + * note: the 82575, 82576 and 82580 are operated using registers aliased + * to the 82563-style architecture. many features seen in the 82598 + * are also seen in the 82575 part. + */ + +enum { + /* General */ + + Ctrl = 0x0000, /* Device Control */ + Status = 0x0008, /* Device Status */ + Eec = 0x0010, /* EEPROM/Flash Control/Data */ + Eerd = 0x0014, /* EEPROM Read */ + Ctrlext = 0x0018, /* Extended Device Control */ + Fla = 0x001c, /* Flash Access */ + Mdic = 0x0020, /* MDI Control */ + Fcal = 0x0028, /* Flow Control Address Low */ + Fcah = 0x002C, /* Flow Control Address High */ + Fct = 0x0030, /* Flow Control Type */ + Kumctrlsta = 0x0034, /* Kumeran Control and Status Register */ + Connsw = 0x0034, /* copper / fiber switch control; 82575/82576 */ + Vet = 0x0038, /* VLAN EtherType */ + Fcttv = 0x0170, /* Flow Control Transmit Timer Value */ + Txcw = 0x0178, /* Transmit Configuration Word */ + Rxcw = 0x0180, /* Receive Configuration Word */ + Ledctl = 0x0E00, /* LED control */ + Pba = 0x1000, /* Packet Buffer Allocation */ + Pbs = 0x1008, /* Packet Buffer Size */ + + /* Interrupt */ + + Icr = 0x00C0, /* Interrupt Cause Read */ + Itr = 0x00c4, /* Interrupt Throttling Rate */ + Ics = 0x00C8, /* Interrupt Cause Set */ + Ims = 0x00D0, /* Interrupt Mask Set/Read */ + Imc = 0x00D8, /* Interrupt mask Clear */ + Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */ + Eitr = 0x1680, /* Extended itr; 82575/6 80 only */ + + /* Receive */ + + Rctl = 0x0100, /* Control */ + Ert = 0x2008, /* Early Receive Threshold (573[EVL], 82578 only) */ + Fcrtl = 0x2160, /* Flow Control RX Threshold Low */ + Fcrth = 0x2168, /* Flow Control Rx Threshold High */ + Psrctl = 0x2170, /* Packet Split Receive Control */ + Drxmxod = 0x2540, /* dma max outstanding bytes (82575) */ + Rdbal = 0x2800, /* Rdesc Base Address Low Queue 0 */ + Rdbah = 0x2804, /* Rdesc Base Address High Queue 0 */ + Rdlen = 0x2808, /* Descriptor Length Queue 0 */ + Srrctl = 0x280c, /* split and replication rx control (82575) */ + Rdh = 0x2810, /* Descriptor Head Queue 0 */ + Rdt = 0x2818, /* Descriptor Tail Queue 0 */ + Rdtr = 0x2820, /* Descriptor Timer Ring */ + Rxdctl = 0x2828, /* Descriptor Control */ + Radv = 0x282C, /* Interrupt Absolute Delay Timer */ + Rsrpd = 0x2c00, /* Small Packet Detect */ + Raid = 0x2c08, /* ACK interrupt delay */ + Cpuvec = 0x2c10, /* CPU Vector */ + Rxcsum = 0x5000, /* Checksum Control */ + Rmpl = 0x5004, /* rx maximum packet length (82575) */ + Rfctl = 0x5008, /* Filter Control */ + Mta = 0x5200, /* Multicast Table Array */ + Ral = 0x5400, /* Receive Address Low */ + Rah = 0x5404, /* Receive Address High */ + Vfta = 0x5600, /* VLAN Filter Table Array */ + Mrqc = 0x5818, /* Multiple Receive Queues Command */ + + /* Transmit */ + + Tctl = 0x0400, /* Transmit Control */ + Tipg = 0x0410, /* Transmit IPG */ + Tkabgtxd = 0x3004, /* glci afe band gap transmit ref data, or something */ + Tdbal = 0x3800, /* Tdesc Base Address Low */ + Tdbah = 0x3804, /* Tdesc Base Address High */ + Tdlen = 0x3808, /* Descriptor Length */ + Tdh = 0x3810, /* Descriptor Head */ + Tdt = 0x3818, /* Descriptor Tail */ + Tidv = 0x3820, /* Interrupt Delay Value */ + Txdctl = 0x3828, /* Descriptor Control */ + Tadv = 0x382C, /* Interrupt Absolute Delay Timer */ + Tarc0 = 0x3840, /* Arbitration Counter Queue 0 */ + + /* Statistics */ + + Statistics = 0x4000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 0x124/4, +}; + +enum { /* Ctrl */ + Lrst = 1<<3, /* link reset */ + Slu = 1<<6, /* Set Link Up */ + Devrst = 1<<26, /* Device Reset */ + Rfce = 1<<27, /* Receive Flow Control Enable */ + Tfce = 1<<28, /* Transmit Flow Control Enable */ + Phyrst = 1<<31, /* Phy Reset */ +}; + +enum { /* Status */ + Lu = 1<<1, /* Link Up */ + Lanid = 3<<2, /* mask for Lan ID. */ + Txoff = 1<<4, /* Transmission Paused */ + Tbimode = 1<<5, /* TBI Mode Indication */ + Phyra = 1<<10, /* PHY Reset Asserted */ + GIOme = 1<<19, /* GIO Master Enable Status */ +}; + +enum { + /* Eec */ + Nvpres = 1<<8, /* nvram present */ + Autord = 1<<9, /* autoread complete */ + Sec1val = 1<<22, /* sector 1 valid (!sec0) */ +}; + +enum { /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ +}; + +enum { /* Ctrlext */ + Eerst = 1<<13, /* EEPROM Reset */ + Linkmode = 3<<22, /* linkmode */ + Internalphy = 0<<22, /* " internal phy (copper) */ + Sgmii = 2<<22, /* " sgmii */ + Serdes = 3<<22, /* " serdes */ +}; + +enum { + /* Connsw */ + Enrgirq = 1<<2, /* interrupt on power detect (enrgsrc) */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* phy interface */ + Phyctl = 0, /* phy ctl register */ + Phyisr = 19, /* 82563 phy interrupt status register */ + Phylhr = 19, /* 8257[12] link health register */ + Physsr = 17, /* phy secondary status register */ + Phyprst = 193<<8 | 17, /* 8256[34] phy port reset */ + Phyier = 18, /* 82573 phy interrupt enable register */ + Phypage = 22, /* 8256[34] page register */ + Phystat = 26, /* 82580 phy status */ + Phyapage = 29, + Phy79page = 31, /* 82579 phy page register (all pages) */ + + Rtlink = 1<<10, /* realtime link status */ + Phyan = 1<<11, /* phy has autonegotiated */ + + /* Phyctl bits */ + Ran = 1<<9, /* restart auto negotiation */ + Ean = 1<<12, /* enable auto negotiation */ + + /* Phyprst bits */ + Prst = 1<<0, /* reset the port */ + + /* 82573 Phyier bits */ + Lscie = 1<<10, /* link status changed ie */ + Ancie = 1<<11, /* auto negotiation complete ie */ + Spdie = 1<<14, /* speed changed ie */ + Panie = 1<<15, /* phy auto negotiation error ie */ + + /* Phylhr/Phyisr bits */ + Anf = 1<<6, /* lhr: auto negotiation fault */ + Ane = 1<<15, /* isr: auto negotiation error */ + + /* 82580 Phystat bits */ + Ans = 1<<14 | 1<<15, /* 82580 autoneg. status */ + Link = 1<<6, /* 82580 Link */ + + /* Rxcw builtin serdes */ + Anc = 1<<31, + Rxsynch = 1<<30, + Rxcfg = 1<<29, + Rxcfgch = 1<<28, + Rxcfgbad = 1<<27, + Rxnc = 1<<26, + + /* Txcw */ + Txane = 1<<31, + Txcfg = 1<<30, +}; + +enum { /* fiber (pcs) interface */ + Pcsctl = 0x4208, /* pcs control */ + Pcsstat = 0x420c, /* pcs status */ + + /* Pcsctl bits */ + Pan = 1<<16, /* autonegotiate */ + Prestart = 1<<17, /* restart an (self clearing) */ + + /* Pcsstat bits */ + Linkok = 1<<0, /* link is okay */ + Andone = 1<<16, /* an phase is done see below for success */ + Anbad = 1<<19 | 1<<20, /* Anerror | Anremfault */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt; !82575/6/80 only */ + Rxdw = 0x00000080, /* Rdesc write back; 82575/6/80 only */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfgset = 0x00000400, /* Receiving /C/ ordered sets */ + Ack = 0x00020000, /* Receive ACK frame */ + Omed = 1<<20, /* media change; pcs interface */ +}; + +enum { /* Txcw */ + TxcwFd = 0x00000020, /* Full Duplex */ + TxcwHd = 0x00000040, /* Half Duplex */ + TxcwPauseMASK = 0x00000180, /* Pause */ + TxcwPauseSHIFT = 7, + TxcwPs = 1<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr *i82563ctlr; +static Rbpool rbtab[Npool]; + +static char *statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-mtu Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + "Management Packets Rx", + "Management Packets Drop", + "Management Packets Tx", + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-mtu Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", + "Interrupt Assertion", + "Interrupt Rx Pkt Timer", + "Interrupt Rx Abs Timer", + "Interrupt Tx Pkt Timer", + "Interrupt Tx Abs Timer", + "Interrupt Tx Queue Empty", + "Interrupt Tx Desc Low", + "Interrupt Rx Min", + "Interrupt Rx Overrun", +}; + +static char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Block* +i82563rballoc(Rbpool *p) +{ + Block *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + b->flags &= ~FREE; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Block *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->flags |= FREE; + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(0) + iprint("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Block *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Block *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Block *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Block *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Block *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Block *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Block *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Block *b) +{ + rbfree(b, 7); +} + +static void +rbfree8(Block *b) +{ + rbfree(b, 8); +} + +static void +rbfree9(Block *b) +{ + rbfree(b, 9); +} + +static Freefn freetab[Npool] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, + rbfree8, + rbfree9, +}; + +static int +newpool(void) +{ + static int seq; + + if(seq == nelem(freetab)) + return -1; + if(freetab[seq] == nil){ + print("82563: bad freetab\n"); + return -1; + } + return seq++; +} + +static void +i82563im(Ctlr *ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static void +i82563txinit(Ctlr *ctlr) +{ + int i; + u32int r; + Block *b; + + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Tctl, 0x0F<tdba)); + csr32w(ctlr, Tdbah, Pciwaddrh(ctlr->tdba)); + csr32w(ctlr, Tdlen, ctlr->ntd * sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + for(i = 0; i < ctlr->ntd; i++){ + if((b = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(b); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + csr32w(ctlr, Tidv, 128); + csr32w(ctlr, Tadv, 64); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) | Ten); + r = csr32r(ctlr, Txdctl) & ~WthreshMASK; + r |= 4<type].flag & F75) + r |= Enable; + csr32w(ctlr, Txdctl, r); +} + +#define Next(x, m) (((x)+1) & (m)) + +static int +i82563cleanup(Ether *e) +{ + Block *b; + Ctlr *c; + int tdh, m, n; + + c = e->ctlr; + tdh = c->tdh; + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status & Tdd){ + tdh = n; + if((b = c->tb[tdh]) != nil){ + c->tb[tdh] = nil; + freeb(b); + }else + iprint("#l%d: %s tx underrun! %d\n", e->ctlrno, cname(c), n); + c->tdba[tdh].status = 0; + } + + return c->tdh = tdh; +} + +static int +notrim(void *v) +{ + Ctlr *c; + + c = v; + return (c->im & Txdw) == 0; +} + +static void +i82563tproc(void) +{ + Td *td; + Block *bp; + Ether *edev; + Ctlr *ctlr; + int tdh, tdt, m; + + edev = u->arg; + ctlr = edev->ctlr; + tdt = ctlr->tdt; + m = ctlr->ntd-1; + + for(;;){ + tdh = i82563cleanup(edev); + + if(Next(tdt, m) == tdh){ + ctlr->txdw++; + i82563im(ctlr, Txdw); + sleep(&ctlr->trendez, notrim, ctlr); + continue; + } +Msgbuf* etheroq1(Ether*, int); + bp = etheroq1(edev, 0); + td = &ctlr->tdba[tdt]; + td->addr[0] = Pciwaddrl(bp->rp); + td->addr[1] = Pciwaddrh(bp->rp); + td->control = Ide|Rs|Ifcs|Teop|BLEN(bp); + ctlr->tb[tdt] = bp; + tdt = Next(tdt, m); +extern void sfence(void); + sfence(); + csr32w(ctlr, Tdt, tdt); + } +} + +static int +i82563replenish(Ctlr *ctlr, int maysleep) +{ + uint rdt, m, i; + Block *bp; + Rbpool *p; + Rd *rd; + + rdt = ctlr->rdt; + m = ctlr->nrd-1; + p = rbtab + ctlr->pool; + i = 0; + for(; Next(rdt, m) != ctlr->rdh; rdt = Next(rdt, m)){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] != nil){ + iprint("%s: tx overrun\n", cname(ctlr)); + break; + } + redux: + bp = i82563rballoc(p); + if(bp == nil){ + if(rdt - ctlr->rdh >= 16) + break; + print("%s: pool %d: no rx buffers\n", cname(ctlr), ctlr->pool); + if(maysleep == 0) + return -1; + ilock(p); + p->starve = 1; + iunlock(p); + sleep(p, icansleep, p); + goto redux; + } + i++; + ctlr->rb[rdt] = bp; + rd->addr[0] = Pciwaddrl(bp->rp); + rd->addr[1] = Pciwaddrh(bp->rp); + rd->status = 0; + ctlr->rdfree++; + } + if(i != 0){ + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); + } + return 0; +} + +static void +i82563rxinit(Ctlr *ctlr) +{ + int i; + Block *bp; + + if(ctlr->rbsz <= 2048) + csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF); + else{ + i = ctlr->rbsz / 1024; + if(ctlr->rbsz % 1024) + i++; + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rctl, Lpe|Dpf|Bsize2048|Bam|RdtmsHALF|Secrc); + if(ctlr->type != i82575) + i |= (ctlr->nrd/2>>4)<<20; /* RdmsHalf */ + csr32w(ctlr, Srrctl, i | Dropen); + csr32w(ctlr, Rmpl, ctlr->rbsz); +// csr32w(ctlr, Drxmxod, 0x7ff); + }else + csr32w(ctlr, Rctl, Lpe|Dpf|BsizeFlex*i|Bam|RdtmsHALF|Secrc); + } + + if(cttab[ctlr->type].flag & Fert) + csr32w(ctlr, Ert, 1024/8); + + if(ctlr->type == i82566) + csr32w(ctlr, Pbs, 16); + + csr32w(ctlr, Rdbal, Pciwaddrl(ctlr->rdba)); + csr32w(ctlr, Rdbah, Pciwaddrh(ctlr->rdba)); + csr32w(ctlr, Rdlen, ctlr->nrd * sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 0; //25; + ctlr->radv = 0; //500; + csr32w(ctlr, Rdtr, ctlr->rdtr); + csr32w(ctlr, Radv, ctlr->radv); + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Rxdctl, 1<rim != 0; +} + +static void +i82563rproc(void) +{ + uint m, rdh, rim, im; + Block *bp; + Ctlr *ctlr; + Ether *edev; + Rd *rd; + + edev = u->arg; + ctlr = edev->ctlr; + + i82563rxinit(ctlr); + csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren); + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable); + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + }else + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + m = ctlr->nrd-1; + + for(;;){ + i82563im(ctlr, im); + ctlr->rsleep++; + i82563replenish(ctlr, 1); + sleep(&ctlr->rrendez, i82563rim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + rim = ctlr->rim; + ctlr->rim = 0; + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + bp = ctlr->rb[rdh]; + if((rd->status & Reop) && rd->errors == 0){ + bp->count = rd->length; + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flags |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flags |= Btcpck|Budpck; + } + bp->flags |= Bpktck; + } + etheriq(edev, bp); + } else + freeb(bp); + ctlr->rb[rdh] = nil; + rd->status = 0; + ctlr->rdfree--; + ctlr->rdh = rdh = Next(rdh, m); + if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0)) + if(i82563replenish(ctlr, 0) == -1) + break; + } + } +} + +static int +i82563lim(void *v) +{ + return ((Ctlr*)v)->lim != 0; +} + +static int speedtab[] = { + 10, 100, 1000, 0 +}; + +static uint phywrite0(Ctlr*, int, int, ushort); + +static uint +setpage(Ctlr *c, uint phyno, uint p, uint r) +{ + uint pr; + + switch(c->type){ + case i82563: + if(r >= 16 && r <= 28 && r != 22) + pr = Phypage; + else if(r == 30 || r == 31) + pr = Phyapage; + else + return 0; + return phywrite0(c, phyno, pr, p); + case i82579: + return phywrite0(c, phyno, Phy79page, p<<5); + default: + if(p == 0) + return 0; + return ~0; + } +} + +static uint +phyread0(Ctlr *c, int phyno, int reg) +{ + uint phy, i; + + csr32w(c, Mdic, MDIrop | phyno<type].name, phyno, phy); + return ~0; + } + return phy & 0xffff; +} + +static uint +phyread(Ctlr *c, uint phyno, uint reg) +{ + if(setpage(c, phyno, reg>>8, reg & 0xff) == ~0){ + print("%s: phyread: bad phy page %d\n", cname(c), reg>>8); + return ~0; + } + return phyread0(c, phyno, reg & 0xff); +} + +static uint +phywrite0(Ctlr *c, int phyno, int reg, ushort val) +{ + uint phy, i; + + csr32w(c, Mdic, MDIwop | phyno<>8, reg & 0xff) == ~0) + panic("%s: bad phy reg %.4ux", cname(c), reg); + return phywrite0(c, phyno, reg & 0xff, v); +} + +static void +phyerrata(Ether *e, Ctlr *c, uint phyno) +{ + if(e->mbps == 0) + if(c->phyerrata == 0){ + c->phyerrata++; + phywrite(c, phyno, Phyprst, Prst); /* try a port reset */ + print("%s: phy port reset\n", cname(c)); + } + else + c->phyerrata = 0; +} + +static void +i82563attach(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->alloc != nil){ + qunlock(&ctlr->alock); + return; + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + ctlr->alloc = ialloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 255, 0); + if(ctlr->alloc == nil){ + qunlock(&ctlr->alock); + panic("i82563: no memory"); + } + ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 256); + ctlr->tdba = (Td*)(ctlr->rdba + ctlr->nrd); + + ctlr->rb = ialloc(ctlr->nrd * sizeof(Block*), 0); + ctlr->tb = ialloc(ctlr->ntd * sizeof(Block*), 0); + + mballocpool(Nrb, ctlr->rbsz, Rbalign, Mbeth82563, freetab[ctlr->pool]); + + snprint(ctlr->rname, sizeof ctlr->rname, "#l%dr", edev->ctlrno); + userinit(i82563rproc, edev, ctlr->rname); + + i82563txinit(ctlr); + snprint(ctlr->tname, sizeof ctlr->tname, "#l%dt", edev->ctlrno); + userinit(i82563tproc, edev, ctlr->tname); + + qunlock(&ctlr->alock); +} + +static void +i82563interrupt(Ureg*, void *arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + + while(icr = csr32r(ctlr, Icr) & ctlr->im){ + if(icr & (Lsc | Omed)){ + im &= ~(Lsc | Omed); + ctlr->lim = icr & (Lsc | Omed); + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack)){ + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + ctlr->tintr++; + wakeup(&ctlr->trendez); + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); +} + +static int +i82563detach(Ctlr *ctlr) +{ + int r, timeo; + + /* balance rx/tx packet buffer; survives reset */ + if(ctlr->rbsz > 8192 && cttab[ctlr->type].flag & Fpba){ + ctlr->pba = csr32r(ctlr, Pba); + r = ctlr->pba >> 16; + r += ctlr->pba & 0xffff; + r >>= 1; + csr32w(ctlr, Pba, r); + }else if(ctlr->type == i82573 && ctlr->rbsz > 1514) + csr32w(ctlr, Pba, 14); + ctlr->pba = csr32r(ctlr, Pba); + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) & ~Ten); + + delay(10); + + r = csr32r(ctlr, Ctrl); + if(ctlr->type == i82566 || ctlr->type == i82579) + r |= Phyrst; + csr32w(ctlr, Ctrl, Devrst | r); + delay(1); + for(timeo = 0;; timeo++){ + if((csr32r(ctlr, Ctrl) & (Devrst|Phyrst)) == 0) + break; + if(timeo >= 1000) + return -1; + delay(1); + } + + r = csr32r(ctlr, Ctrl); + csr32w(ctlr, Ctrl, Slu|r); + + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if((csr32r(ctlr, Icr) & ~Rxcfg) == 0) + break; + delay(1); + } + if(csr32r(ctlr, Icr) & ~Rxcfg) + return -1; + + return 0; +} + +static void +i82563shutdown(Ether *edev) +{ + i82563detach(edev->ctlr); +} + +static ushort +eeread(Ctlr *ctlr, int adr) +{ + csr32w(ctlr, Eerd, EEstart | adr << 2); + while ((csr32r(ctlr, Eerd) & EEdone) == 0) + ; + return csr32r(ctlr, Eerd) >> 16; +} + +static int +eeload(Ctlr *ctlr) +{ + u16int sum; + int data, adr; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = eeread(ctlr, adr); + ctlr->eeprom[adr] = data; + sum += data; + } + return sum; +} + +static int +fcycle(Ctlr*, Flash *f) +{ + u16int s, i; + + s = f->reg[Fsts]; + if((s&Fvalid) == 0) + return -1; + f->reg[Fsts] |= Fcerr | Ael; + for(i = 0; i < 10; i++){ + if((s&Scip) == 0) + return 0; + delay(1); + s = f->reg[Fsts]; + } + return -1; +} + +static int +fread(Ctlr *c, Flash *f, int ladr) +{ + u16int s; + + delay(1); + if(fcycle(c, f) == -1) + return -1; + f->reg[Fsts] |= Fdone; + f->reg32[Faddr] = ladr; + + /* setup flash control register */ + s = f->reg[Fctl] & ~0x3ff; + f->reg[Fctl] = s | 1<<8 | Fgo; /* 2 byte read */ + + while((f->reg[Fsts] & Fdone) == 0) + ; + if(f->reg[Fsts] & (Fcerr|Ael)) + return -1; + return f->reg32[Fdata] & 0xffff; +} + +static int +fload(Ctlr *c) +{ + uint data, io, r, adr; + u16int sum; + Flash f; + + io = c->pcidev->mem[1].bar & ~0x0f; + f.reg = vmap(io, c->pcidev->mem[1].size); + if(f.reg == nil) + return -1; + f.reg32 = (u32int*)f.reg; + f.base = f.reg32[Bfpr] & 0x1fff; + f.lim = f.reg32[Bfpr]>>16 & 0x1fff; + if(csr32r(c, Eec) & Sec1val) + f.base += f.lim+1 - f.base >> 1; + r = f.base << 12; + sum = 0; + for(adr = 0; adr < 0x40; adr++) { + data = fread(c, &f, r + adr*2); + if(data == -1) + return -1; + c->eeprom[adr] = data; + sum += data; + } + vunmap(f.reg, c->pcidev->mem[1].size); + return sum; +} + +static void +defaultea(Ctlr *ctlr, uchar *ra) +{ + uint i, r; + uvlong u; + static uchar nilea[Eaddrlen]; + + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + if(cttab[ctlr->type].flag & Fflashea){ + /* intel mb bug */ + u = (uvlong)csr32r(ctlr, Rah)<<32u | (uint)csr32r(ctlr, Ral); + for(i = 0; i < Eaddrlen; i++) + ra[i] = u >> 8*i; + } + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + for(i = 0; i < Eaddrlen/2; i++){ + ra[2*i] = ctlr->eeprom[Ea+i]; + ra[2*i+1] = ctlr->eeprom[Ea+i] >> 8; + } + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ra[5] += r; /* ea ctlr[n] = ea ctlr[0]+n */ +} + +static int +reset(Ctlr *ctlr) +{ + uchar *ra; + int i, r; + + if(i82563detach(ctlr)) + return -1; + if(cttab[ctlr->type].flag & Fload) + r = fload(ctlr); + else + r = eeload(ctlr); + if(r != 0 && r != 0xbaba){ + print("%s: bad eeprom checksum - %#.4ux\n", + cname(ctlr), r); + return -1; + } + + ra = ctlr->ra; + defaultea(ctlr, ra); + csr32w(ctlr, Ral, ra[3]<<24 | ra[2]<<16 | ra[1]<<8 | ra[0]); + csr32w(ctlr, Rah, 1<<31 | ra[5]<<8 | ra[4]); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta + i*4, 0); + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x0100); + if(ctlr->type != i82579 && ctlr->type != i210 && ctlr->type != i350) + csr32w(ctlr, Fct, 0x8808); + csr32w(ctlr, Fcttv, 0x0100); + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Eitr, 128<<2); /* 128 ¼ microsecond intervals */ + return 0; +} + +static int +didtype(int d) +{ + switch(d){ + case 0x1096: + case 0x10ba: /* “gilgal” */ + case 0x1098: /* serdes; not seen */ + case 0x10bb: /* serdes */ + return i82563; + case 0x1049: /* mm */ + case 0x104a: /* dm */ + case 0x104b: /* dc */ + case 0x104d: /* v “ninevah” */ + case 0x10bd: /* dm-2 */ + case 0x294c: /* ich 9 */ + return i82566; + case 0x10de: /* lm ich10d */ + case 0x10df: /* lf ich10 */ + case 0x10e5: /* lm ich9 */ + case 0x10f5: /* lm ich9m; “boazman” */ + return i82567; + case 0x10bf: /* lf ich9m */ + case 0x10cb: /* v ich9m */ + case 0x10cd: /* lf ich10 */ + case 0x10ce: /* v ich10 */ + case 0x10cc: /* lm ich10 */ + return i82567m; + case 0x105e: /* eb */ + case 0x105f: /* eb */ + case 0x1060: /* eb */ + case 0x10a4: /* eb */ + case 0x10a5: /* eb fiber */ + case 0x10bc: /* eb */ + case 0x10d9: /* eb serdes */ + case 0x10da: /* eb serdes “ophir” */ + return i82571; + case 0x107d: /* eb copper */ + case 0x107e: /* ei fiber */ + case 0x107f: /* ei */ + case 0x10b9: /* ei “rimon” */ + return i82572; + case 0x108b: /* e “vidalia” */ + case 0x108c: /* e (iamt) */ + case 0x109a: /* l “tekoa” */ + return i82573; + case 0x10d3: /* l or it; “hartwell” */ + return i82574; + case 0x10a7: + case 0x10a9: /* fiber/serdes */ + return i82575; + case 0x10c9: /* copper */ + case 0x10e6: /* fiber */ + case 0x10e7: /* serdes; “kawela” */ + case 0x150d: /* backplane */ + return i82576; + case 0x10ea: /* lc “calpella”; aka pch lan */ + return i82577; + case 0x10eb: /* lm “calpella” */ + return i82577m; + case 0x10ef: /* dc “piketon” */ + return i82578; + case 0x1502: /* lm */ + case 0x1503: /* v “lewisville” */ + return i82579; + case 0x10f0: /* dm “king's creek” */ + return i82578m; + case 0x150e: /* “barton hills” */ + case 0x150f: /* fiber */ + case 0x1510: /* backplane */ + case 0x1511: /* sfp */ + case 0x1516: + return i82580; + case 0x1506: /* v */ + return i82583; + case 0x1533: /* i210-t1 */ + case 0x1534: + case 0x1536: /* fiber */ + case 0x1538: + case 0x1539: /* i211 */ + case 0x153a: /* i217-lm */ + case 0x153b: /* i217-v */ + return i210; + case 0x151f: /* “powerville” eeprom-less */ + case 0x1521: /* copper */ + case 0x1522: /* fiber */ + case 0x1523: /* serdes */ + case 0x1524: /* sgmii */ + return i350; + } + return -1; +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +i82563pci(void) +{ + int type; + Ctlr *c, **cc; + Pcidev *p; + + cc = &i82563ctlr; + for(p = nil; p = pcimatch(p, 0x8086, 0);){ + hbafixup(p); + if((type = didtype(p->did)) == -1) + continue; + c = ialloc(sizeof *c, 0); + c->type = type; + c->pcidev = p; + c->rbsz = cttab[type].mtu; + c->port = p->mem[0].bar & ~0x0F; + *cc = c; + cc = &c->next; + } +} + +static int +setup(Ctlr *ctlr) +{ + Pcidev *p; + + if((ctlr->pool = newpool()) == -1){ + print("%s: no pool\n", cname(ctlr)); + return -1; + } + p = ctlr->pcidev; + ctlr->nic = vmap(ctlr->port, p->mem[0].size); + if(ctlr->nic == nil){ + print("%s: can't map %#P\n", cname(ctlr), ctlr->port); + return -1; + } + if(reset(ctlr)){ + vunmap(ctlr->nic, p->mem[0].size); + return -1; + } + pcisetbme(ctlr->pcidev); + return 0; +} + +static void +i82563transmit(Ether*) +{ +} + +int +i82563reset(Ether *edev) +{ + int type; + Ctlr *ctlr; + static int done; + + type = -1; + if(!done) { + i82563pci(); + done = 1; + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = i82563ctlr; ; ctlr = ctlr->next){ + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(type != -1 && ctlr->type != type) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + memmove(ctlr->ra, edev->ea, Eaddrlen); + if(setup(ctlr) == 0) + break; + } + } + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->ifc.maxmtu = ctlr->rbsz; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = i82563attach; + edev->interrupt = i82563interrupt; + edev->transmit = i82563transmit; + +// edev->arg = edev; + + return 0; +} --- /sys/src/fs/amd64/etherif.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/etherif.h Tue Aug 27 16:10:53 2013 @@ -0,0 +1,56 @@ +typedef struct Ether Ether; +typedef struct Etherctlr Etherctlr; + +struct Ether { + Pciconf; /* hardware info */ + + int ctlrno; + char iname[NAMELEN]; + char oname[NAMELEN]; + int tbdf; /* type+busno+devno+funcno */ + int mbps; /* Mbps */ + uchar ea[Easize]; + + void (*attach)(Ether*); /* filled in by reset routine */ + void (*transmit)(Ether*); + void (*interrupt)(Ureg*, void*); + void *ctlr; + + Ifc ifc; + + Lock rqlock; + Msgbuf *rqhead; + Msgbuf *rqtail; + Rendez rqr; + + Lock tqlock; + Msgbuf *tqhead; + Msgbuf *tqtail; + Rendez tqr; +}; + +struct Etherctlr{ + char *type; + int (*reset)(Ether*); +}; + +extern Etherctlr etherctlr[]; +extern int netherctlr; +extern Ether etherif[MaxEther]; +extern int nether; + +void etheriq(Ether*, Msgbuf*); +Msgbuf *etheroq(Ether*); +int ethercfgmatch(Ether*, Pcidev*, uintmem); + +int etherga620reset(Ether*); +int ether21140reset(Ether*); +int etherelnk3reset(Ether*); +int etheri82557reset(Ether*); +int igbepnp(Ether*); +int dp83815reset(Ether*); +int dp83820pnp(Ether*); +int rtl8139pnp(Ether*); +int rtl8169pnp(Ether*); +int i82563reset(Ether*); +int m10gpnp(Ether*); --- /sys/src/fs/amd64/kbd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/kbd.c Tue Aug 27 16:10:53 2013 @@ -0,0 +1,332 @@ +#include "all.h" +#include "io.h" +#include "ureg.h" + +enum { + Data= 0x60, /* data port */ + + Status= 0x64, /* status port */ + Inready= 0x01, /* input character ready */ + Outbusy= 0x02, /* output busy */ + Sysflag= 0x04, /* system flag */ + Cmddata= 0x08, /* cmd==0, data==1 */ + Inhibit= 0x10, /* keyboard/mouse inhibited */ + Minready= 0x20, /* mouse character ready */ + Rtimeout= 0x40, /* general timeout */ + Parity= 0x80, + + Cmd= 0x64, /* command port (write only) */ + + CTdata= 0x0, /* chips & Technologies ps2 data port */ + CTstatus= 0x1, /* chips & Technologies ps2 status port */ + Enable= 1<<7, + Clear= 1<<6, + Error= 1<<5, + Intenable= 1<<4, + Reset= 1<<3, + Tready= 1<<2, + Rready= 1<<1, + Idle= 1<<0, + + Spec= 0x80, + + PF= Spec|0x20, /* num pad function key */ + View= Spec|0x00, /* view (shift window up) */ + KF= Spec|0x40, /* function key */ + Shift= Spec|0x60, + Break= Spec|0x61, + Ctrl= Spec|0x62, + Latin= Spec|0x63, + Caps= Spec|0x64, + Num= Spec|0x65, + Middle= Spec|0x66, + No= 0x00, /* peter */ + + Home= KF|13, + Up= KF|14, + Pgup= KF|15, + Print= KF|16, + Left= View, + Right= View, + End= '\r', + Down= View, + Pgdown= View, + Ins= KF|20, + Del= 0x7F, + + Rbutton=4, + Mbutton=2, + Lbutton=1, +}; + +uchar kbtab[] = +{ +[0x00] No, 0x1b, '1', '2', '3', '4', '5', '6', +[0x08] '7', '8', '9', '0', '-', '=', '\b', '\t', +[0x10] 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +[0x18] 'o', 'p', '[', ']', '\n', Ctrl, 'a', 's', +[0x20] 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +[0x28] '\'', '`', Shift, '\\', 'z', 'x', 'c', 'v', +[0x30] 'b', 'n', 'm', ',', '.', '/', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, KF|12, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', Del, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +}; + +uchar kbtabshift[] = +{ +[0x00] No, 0x1b, '!', '@', '#', '$', '%', '^', +[0x08] '&', '*', '(', ')', '_', '+', '\b', '\t', +[0x10] 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +[0x18] 'O', 'P', '{', '}', '\n', Ctrl, 'A', 'S', +[0x20] 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +[0x28] '"', '~', Shift, '|', 'Z', 'X', 'C', 'V', +[0x30] 'B', 'N', 'M', '<', '>', '?', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, KF|12, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +}; + +uchar kbtabesc1[] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Latin, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +}; + +static uchar ccc; +static int shift; + +enum +{ + /* controller command byte */ + Cscs1= (1<<6), /* scan code set 1 */ + Cmousedis= (1<<5), /* mouse disable */ + Ckbddis= (1<<4), /* kbd disable */ + Csf= (1<<2), /* system flag */ + Cmouseint= (1<<1), /* mouse interrupt enable */ + Ckbdint= (1<<0), /* kbd interrupt enable */ +}; + +/* + * wait for output no longer busy + */ +static int +outready(void) +{ + int tries; + + for(tries = 0; (inb(Status) & Outbusy); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * wait for input + */ +static int +inready(void) +{ + int tries; + + for(tries = 0; !(inb(Status) & Inready); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * ask 8042 to enable the use of address bit 20 + */ +void +i8042a20(void) +{ + outready(); + outb(Cmd, 0xD1); + outready(); + outb(Data, 0xDF); + outready(); +} + +/* + * ask 8042 to reset the machine + */ +void +i8042reset(void) +{ + ushort *s = (ushort*)(KZERO+0x472); + + *s = 0x1234; /* BIOS warm-boot flag */ + + outready(); + outb(Cmd, 0xFE); /* pulse reset line (means resend on AT&T machines) */ + outready(); +} + +/* + * keyboard processing + */ +int +kbdintr0(void) +{ + int s, c; + static int esc1, esc2; + static int caps; + static int ctl; + static int num; + int keyup; + + /* + * get status + */ + s = inb(Status); + if(!(s&Inready)) + return -1; + + /* + * get the character + */ + c = inb(Data); + + /* + * e0's is the first of a 2 character sequence + */ + if(c == 0xe0){ + esc1 = 1; + return -1; + } else if(c == 0xe1){ + esc2 = 2; + return -1; + } + + keyup = c&0x80; + c &= 0x7f; + if(c > sizeof kbtab){ + print("unknown key %ux\n", c|keyup); + return -1; + } + + if(esc1){ + c = kbtabesc1[c]; + esc1 = 0; + } else if(esc2){ + esc2--; + return -1; + } else if(shift) + c = kbtabshift[c]; + else + c = kbtab[c]; + + if(caps && c<='z' && c>='a') + c += 'A' - 'a'; + + /* + * keyup only important for shifts + */ + if(keyup){ + switch(c){ + case Shift: + shift = 0; + break; + case Ctrl: + ctl = 0; + break; + } + return -1; + } + + /* + * normal character + */ + if(!(c & Spec)){ + if(ctl) + c &= 0x1f; + return c; + } else { + switch(c){ + case Caps: + caps ^= 1; + return -1; + case Num: + num ^= 1; + return -1; + case Shift: + shift = 1; + return -1; + case Ctrl: + ctl = 1; + return -1; + } + } + return -1; +} + +static void +kbdintr(Ureg *ur, void *v) +{ + int c; + + USED(ur, v); + if((c = kbdintr0()) >= 0) + kbdchar(c); +} + +int +kbdgetc(void) +{ + int c; + + if((c = kbdintr0()) < 0) + return 0; + return c; +} + +void +kbdinit(void) +{ + int c; + + intrenable(IrqKBD, kbdintr, 0, BUSUNKNOWN, "kbd"); + + /* wait for a quiescent controller */ + while((c = inb(Status)) & (Outbusy | Inready)) + if(c & Inready) + inb(Data); + + /* get current controller command byte */ + outb(Cmd, 0x20); + if(inready() < 0){ + print("kbdinit: can't read ccc\n"); + ccc = 0; + } else + ccc = inb(Data); + + /* enable kbd xfers and interrupts */ + ccc &= ~Ckbddis; + ccc |= Csf | Ckbdint | Cscs1; + if(outready() < 0) + print("kbd init failed\n"); + outb(Cmd, 0x60); + if(outready() < 0) + print("kbd init failed\n"); + outb(Data, ccc); + outready(); +} --- /sys/src/fs/amd64/toy.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/toy.c Tue Aug 27 16:10:54 2013 @@ -0,0 +1,136 @@ +#include "all.h" +#include "io.h" + +enum { + Paddr= 0x70, /* address port */ + Pdata= 0x71, /* data port */ + + Seconds= 0x00, + Minutes= 0x02, + Hours= 0x04, + Mday= 0x07, + Month= 0x08, + Year= 0x09, + Status= 0x0A, + + Nbcd= 6, +}; + +#define GETBCD(o) ((bcdclock[o]&0xf) + 10*(bcdclock[o]>>4)) +#define PUTBCD(n,o) bcdclock[o] = (n % 10) | (((n / 10) % 10)<<4) + +static Lock rtclock; + +void +setrtc(Timet secs) +{ + Rtc rtc; + uchar bcdclock[Nbcd]; + + sec2rtc(secs, &rtc); + + PUTBCD(rtc.sec, 0); + PUTBCD(rtc.min, 1); + PUTBCD(rtc.hour, 2); + PUTBCD(rtc.mday, 3); + PUTBCD(rtc.mon, 4); + PUTBCD(rtc.year, 5); + + ilock(&rtclock); + outb(Paddr, Seconds); outb(Pdata, bcdclock[0]); + outb(Paddr, Minutes); outb(Pdata, bcdclock[1]); + outb(Paddr, Hours); outb(Pdata, bcdclock[2]); + outb(Paddr, Mday); outb(Pdata, bcdclock[3]); + outb(Paddr, Month); outb(Pdata, bcdclock[4]); + outb(Paddr, Year); outb(Pdata, bcdclock[5]); + iunlock(&rtclock); +} + +static ulong +_rtctime(void) +{ + uchar bcdclock[Nbcd]; + Rtc rtc; + int i; + + /* don't do the read until the clock is no longer busy */ + for(i = 0; i < 10000; i++){ + outb(Paddr, Status); + if(inb(Pdata) & 0x80) + continue; + + /* read clock values */ + outb(Paddr, Seconds); bcdclock[0] = inb(Pdata); + outb(Paddr, Minutes); bcdclock[1] = inb(Pdata); + outb(Paddr, Hours); bcdclock[2] = inb(Pdata); + outb(Paddr, Mday); bcdclock[3] = inb(Pdata); + outb(Paddr, Month); bcdclock[4] = inb(Pdata); + outb(Paddr, Year); bcdclock[5] = inb(Pdata); + + outb(Paddr, Status); + if((inb(Pdata) & 0x80) == 0) + break; + } + + /* + * convert from BCD + */ + rtc.sec = GETBCD(0); + rtc.min = GETBCD(1); + rtc.hour = GETBCD(2); + rtc.mday = GETBCD(3); + rtc.mon = GETBCD(4); + rtc.year = GETBCD(5); + + /* + * the world starts jan 1 1970 + */ + if(rtc.year < 70) + rtc.year += 2000; + else + rtc.year += 1900; + return rtc2sec(&rtc); +} + +Timet +rtctime(void) +{ + int i; + Timet t, ot; + + ilock(&rtclock); + + /* loop till we get two reads in a row the same */ + t = _rtctime(); + for(i = 0; i < 100; i++){ + ot = t; + t = _rtctime(); + if(ot == t) + break; + } + iunlock(&rtclock); + + return t; +} + +uchar +nvramread(int addr) +{ + uchar data; + + ilock(&rtclock); + outb(Paddr, addr); + data = inb(Pdata); + iunlock(&rtclock); + + return data; +} + +void +nvramwrite(int addr, uchar data) +{ + ilock(&rtclock); + outb(Paddr, addr); + outb(Pdata, data); + iunlock(&rtclock); +} --- /sys/src/fs/amd64/malloc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/malloc.c Tue Aug 27 16:10:54 2013 @@ -0,0 +1,195 @@ +#include "all.h" +#include "io.h" + +uint niob; +uint nhiob; +Hiob *hiob; + +/* + * this ugliness may be fixed by moving INIMAP to KSEG2 + */ +void* +kaddr(uintmem pa) +{ + uchar *a; + + a = UINT2PTR(pa); + if(pa < INIMAP) + return a+KZERO; + if(pa < KSEG2) + return a+KSEG2; + panic("kaddr: bad pa %#P\n", pa); + return 0; +} + +uintmem +paddr(void *va) +{ + uintmem a; + + a = PTR2UINT(va); + if(a >= KZERO && a < KZERO+INIMAP) + return a-KZERO; + if(a >= KSEG2) + return a-KSEG2; + panic("paddr: va %#p pa %#P @ %#p", va, mmuphysaddr(a), getcallerpc(&va)); + return 0; +} + +void +prbanks(void) +{ + Mbank *b; + int m; + + for(m = 0; m < mconf.nbank; m++){ + b = mconf.bank+m; + print("bank[%d]: base %#p, limit %#p\n", m, b->base, b->limit); + } +} + + +/* + * Called to allocate permanent data structures + * Alignment is in number of bytes. It pertains both to the start and + * end of the allocated memory. + */ +void* +ialloc(uintptr n, int align) +{ + Mbank *b; + uintmem p; + int m; + + ilock(&mconf); + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++){ + p = b->base; + + if(align <= 0) + align = sizeof(uintptr); + if(m = n % align) + n += align - m; + if(m = p % align) + p += align - m; + + if(p+n > b->limit) + continue; + + b->base = p+n; + iunlock(&mconf); + + /* this is really ugly */ + if(b == mconf.bank){ + memset((void*)(p+KZERO), 0, n); + return (void*)(p+KZERO); + } + memset((void*)(p+KSEG2), 0, n); + return (void*)(p+KSEG2); + } + + iunlock(&mconf); + + prbanks(); + panic("ialloc(%p, %d): out of memory: %#p nbank=%d\n", n, align, getcallerpc(&n), mconf.nbank); + return 0; +} + +static void +cmd_memory(int, char *[]) +{ + prbanks(); +} + +/* + * allocate rest of mem + * for io buffers. + */ +#define HWIDTH 8 /* buffers per hash */ +void +iobufinit(void) +{ + long i; + uintptr m, v; + Iobuf *p, *q; + Hiob *hp; + Mbank *b; + + wlock(&mainlock); /* init */ + wunlock(&mainlock); + + m = 0; + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++) + m += b->limit - b->base; + + m -= conf.sparemem; + + niob = m / (sizeof(Iobuf) + RBUFSIZE + sizeof(Hiob)/HWIDTH); + nhiob = niob / HWIDTH; + while(!prime(nhiob)) + nhiob++; + print(" %d buffers; %d hashes\n", niob, nhiob); + hiob = ialloc(nhiob * sizeof(Hiob), 0); + hp = hiob; + for(i=0; inamebuf, sizeof hp->namebuf, "hiob%uld\n", i); + hp->name = hp->namebuf; + qlock(hp); + qunlock(hp); + hp++; + } + p = ialloc(niob * sizeof(Iobuf), 0); + hp = hiob; + for(i=0; iname = "buf"; + snprint(p->namebuf, sizeof p->namebuf, "buf%uld", i); + p->name = p->namebuf; + qlock(p); + qunlock(p); + if(hp == hiob) + hp = hiob + nhiob; + hp--; + q = hp->link; + if(q) { + p->fore = q; + p->back = q->back; + q->back = p; + p->back->fore = p; + } else { + hp->link = p; + p->fore = p; + p->back = p; + } + p->dev = devnone; + p->addr = -1; + p->xiobuf = ialloc(RBUFSIZE, RBUFSIZE); + p->iobuf = (char*)-1; + p++; + } + + /* + * Make sure that no more of bank[0] can be used: + * 'check' will do an ialloc(0, 1) to find the base of + * sparemem. + */ + if(mconf.bank[0].limit < 1024*1024) + mconf.bank[0].base = mconf.bank[0].limit+1; + + v = 0; + for(b = mconf.bank; b < mconf.bank+mconf.nbank; b++) + v += b->limit - b->base; + print(" mem left = %,llud, out of %,llud\n", v, conf.mem); + /* paranoia: add this command as late as is easy */ + cmd_install("memory", "-- print ranges of memory banks", cmd_memory); +} + +void* +iobufmap(Iobuf *p) +{ + return p->iobuf = p->xiobuf; +} + +void +iobufunmap(Iobuf *p) +{ + p->iobuf = (char*)-1; +} --- /sys/src/fs/amd64/pci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/pci.c Tue Aug 27 16:10:55 2013 @@ -0,0 +1,772 @@ +/* pci */ +#include "all.h" +#include "io.h" + +enum { /* configuration mechanism #1 */ + PciADDR = 0xcf8, + PciDATA = 0xcfc, + + Maxfn = 7, + Maxdev = 31, + Maxbus = 255, + + /* command register */ + IOen = 1<<0, + MEMen = 1<<1, + MASen = 1<<2, + MemWrInv = 1<<4, + PErrEn = 1<<6, + SErrEn = 1<<8, + + Write, + Read, +}; + +static Lock pcicfglock; +static Lock pcicfginitlock; +static int pcicfgmode = -1; +static Pcidev* pciroot; +static Pcidev* pcilist; +static Pcidev* pcitail; + +static int pcicfgrw(int, int, int, int, int); + +u32int +pcibarsize(Pcidev *p, int rno) +{ + u32int v, size; + + v = pcicfgr32(p, rno); + pcicfgw32(p, rno, 0xFFFFFFF0); + size = pcicfgr32(p, rno); + if(v & 1) + size |= 0xFFFF0000; + pcicfgw32(p, rno, v); + + return -(size & ~0x0F); +} + +static void +cmd_pcihinv(int argc, char *argv[]) +{ + int i, flags = 0; + + for (i = 1; i < argc; i++) + if (strcmp(argv[i], "-v") == 0) + flags |= 1; + else { + print("unknown pcihinv option %s; options are: -v\n", argv[i]); + return; + } + pcihinv(nil, flags); /* print the whole device tree */ +} + +static u32int +pcimask(u32int v) +{ + u32int m; + + m = 8*sizeof(v); + for(m = 1<<(m-1); m != 0; m >>= 1) { + if(m & v) + break; + } + + m--; + if((v & m) == 0) + return v; + + v |= m; + return v+1; +} + +static int +pcilscan(int bno, Pcidev** list) +{ + Pcidev *p, *head, *tail; + int dno, fno, i, hdt, l, maxfno, maxubn, sbn, tbdf, ubn; + + maxubn = bno; + head = nil; + tail = nil; + for(dno = 0; dno <= Maxdev; dno++){ + maxfno = 0; + for(fno = 0; fno <= maxfno; fno++){ + /* + * For this possible device, form the + * bus+device+function triplet needed to address it + * and try to read the vendor and device ID. + * If successful, allocate a device struct and + * start to fill it in with some useful information + * from the device's configuration space. + */ + tbdf = MKBUS(BusPCI, bno, dno, fno); + l = pcicfgrw(tbdf, PciVID, 0, Read, 4); + if(l == 0xFFFFFFFF || l == 0) + continue; + p = ialloc(sizeof *p, 0); + p->tbdf = tbdf; + p->vid = l; + p->did = l>>16; + + if(pcilist != nil) + pcitail->list = p; + else + pcilist = p; + pcitail = p; + + p->pcr = pcicfgr16(p, PciPCR); + p->rid = pcicfgr8(p, PciRID); + p->ccrp = pcicfgr8(p, PciCCRp); + p->ccru = pcicfgr8(p, PciCCRu); + p->ccrb = pcicfgr8(p, PciCCRb); + p->cls = pcicfgr8(p, PciCLS); + p->ltr = pcicfgr8(p, PciLTR); + + p->intl = pcicfgr8(p, PciINTL); + + /* + * If the device is a multi-function device adjust the + * loop count so all possible functions are checked. + */ + hdt = pcicfgr8(p, PciHDT); + if(hdt & 0x80) + maxfno = Maxfn; + + /* + * If appropriate, read the base address registers + * and work out the sizes. + */ + switch(p->ccrb) { + default: + if((hdt & 0x7F) != 0) + break; + for(i = 0; i < nelem(p->mem); i++) { + p->mem[i].bar = (u32int)pcicfgr32(p, PciBAR0+4*i); + p->mem[i].size = pcibarsize(p, PciBAR0+4*i); + } + break; + + case 0x00: + case 0x05: /* memory controller */ + case 0x06: /* bridge device */ + break; + } + + if(head != nil) + tail->link = p; + else + head = p; + tail = p; + } + } + + *list = head; + for(p = head; p != nil; p = p->link){ + /* + * Find PCI-PCI bridges and recursively descend the tree. + */ + if(p->ccrb != 0x06 || p->ccru != 0x04) + continue; + + /* + * If the secondary or subordinate bus number is not + * initialised try to do what the PCI BIOS should have + * done and fill in the numbers as the tree is descended. + * On the way down the subordinate bus number is set to + * the maximum as it's not known how many buses are behind + * this one; the final value is set on the way back up. + */ + sbn = pcicfgr8(p, PciSBN); + ubn = pcicfgr8(p, PciUBN); + + if(sbn == 0 || ubn == 0) { + print("%τ: unconfigured bridge\n", p->tbdf); + + sbn = maxubn+1; + /* + * Make sure memory, I/O and master enables are + * off, set the primary, secondary and subordinate + * bus numbers and clear the secondary status before + * attempting to scan the secondary bus. + * + * Initialisation of the bridge should be done here. + */ + pcicfgw32(p, PciPCR, 0xFFFF0000); + pcicfgw32(p, PciPBN, Maxbus<<16 | sbn<<8 | bno); + pcicfgw16(p, PciSPSR, 0xFFFF); + maxubn = pcilscan(sbn, &p->bridge); + pcicfgw32(p, PciPBN, maxubn<<16 | sbn<<8 | bno); + } + else { + if(ubn > maxubn) + maxubn = ubn; + pcilscan(sbn, &p->bridge); + } + } + + return maxubn; +} + +static uchar +pIIxget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 0x60, 0x61, 0x62, 0x63 */ + pirq = pcicfgr8(router, link); + return (pirq < 16)? pirq: 0; +} + +static void +pIIxset(Pcidev *router, uchar link, uchar irq) +{ + pcicfgw8(router, link, irq); +} + +static uchar +viaget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 1, 2, 3, 5 */ + pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0; + + return (link & 1)? (pirq >> 4): (pirq & 15); +} + +static void +viaset(Pcidev *router, uchar link, uchar irq) +{ + uchar pirq; + + pirq = pcicfgr8(router, 0x55 + (link >> 1)); + pirq &= (link & 1)? 0x0f: 0xf0; + pirq |= (link & 1)? (irq << 4): (irq & 15); + pcicfgw8(router, 0x55 + (link>>1), pirq); +} + +typedef struct Bridge Bridge; +struct Bridge +{ + ushort vid; + ushort did; + uchar (*get)(Pcidev *, uchar); + void (*set)(Pcidev *, uchar, uchar); +}; + +static Bridge southbridges[] = { + { 0x8086, 0xffff, pIIxget, pIIxset }, /* Intel * */ + + { 0x1002, 0xffff, nil, nil }, /* ati (amd) */ + { 0x1022, 0xffff, nil, nil }, /* amd */ + { 0x10de, 0x00d1, nil, nil }, /* NVIDIA nForce 3 */ + { 0x1106, 0x3227, viaget, viaset }, /* Viatech VT8237 */ + { 0x1166, 0x0200, nil, nil }, /* ServerWorks ServerSet III LE */ +}; + +typedef struct Slot Slot; +struct Slot { + uchar bus; /* Pci bus number */ + uchar dev; /* Pci device number */ + uchar maps[12]; /* Avoid structs! Link and mask. */ + uchar slot; /* Add-in/built-in slot */ + uchar reserved; +}; + +typedef struct Router Router; +struct Router { + uchar signature[4]; /* Routing table signature */ + uchar version[2]; /* Version number */ + uchar size[2]; /* Total table size */ + uchar bus; /* Interrupt router bus number */ + uchar devfn; /* Router's devfunc */ + uchar pciirqs[2]; /* Exclusive PCI irqs */ + uchar compat[4]; /* Compatible PCI interrupt router */ + uchar miniport[4]; /* Miniport data */ + uchar reserved[11]; + uchar checksum; +}; + +#pragma varargck type "τ" int + +static int +τfmt(Fmt* fmt) +{ + char buf[32], *p, *e; + int type, tbdf; + + p = buf; + e = buf+sizeof buf; + tbdf = va_arg(fmt->args, int); + if(tbdf == -1) + return fmtstrcpy(fmt, "unk"); + type = BUSTYPE(tbdf); + if(type == 12) + p = seprint(p, e, "pci"); + else + p = seprint(p, e, "%d", type); + seprint(p, e, ".%d.%d.%d", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + return fmtstrcpy(fmt, buf); +} + +static void +pcirouting(void) +{ + uchar *p, pin, irq, link, *map; + int size, i, fn, tbdf; + Bridge *southbridge; + Pcidev *sbpci, *pci; + Slot *e; + Router *r; + + /* Search for PCI interrupt routing table in BIOS */ + for(p = (uchar*)KADDR(0xf0000); p < (uchar*)KADDR(0xfffff); p += 16) + if(p[0] == '$' && p[1] == 'P' && p[2] == 'I' && p[3] == 'R') + break; + + if(p >= (uchar *)KADDR(0xfffff)) + return; + + r = (Router *)p; + + fmtinstall(L'τ', τfmt); + if(0) + print("PCI interrupt routing table version %d.%d at %.6llux\n", + r->version[0], r->version[1], (uintptr)r & 0xfffff); + + tbdf = (BusPCI << 24)|(r->bus << 16)|(r->devfn << 8); + sbpci = pcimatchtbdf(tbdf); + if(sbpci == nil) { + print("pcirouting: Cannot find south bridge %τ\n", tbdf); + return; + } + + for(i = 0; i != nelem(southbridges); i++) + if(sbpci->vid == southbridges[i].vid + && (sbpci->did == southbridges[i].did || southbridges[i].did == 0xffff)) + break; + + if(i == nelem(southbridges)) { + print("pcirouting: ignoring south bridge %τ %.4ux/%.4ux\n", tbdf, sbpci->vid, sbpci->did); + return; + } + southbridge = &southbridges[i]; + if(southbridge->get == nil || southbridge->set == nil) + return; + + size = (r->size[1] << 8)|r->size[0]; + for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) { + if(0){ + print("%.2ux/%.2ux %.2ux: ", e->bus, e->dev, e->slot); + for (i = 0; i != 4; i++) { + uchar *m = &e->maps[i * 3]; + print("[%d] %.2ux %.4ux ", + i, m[0], (m[2] << 8)|m[1]); + } + print("\n"); + } + + for(fn = 0; fn <= Maxfn; fn++) { + tbdf = MKBUS(BusPCI, e->bus, e->dev, fn); + pci = pcimatchtbdf(tbdf); + if(pci == nil) + continue; + pin = pcicfgr8(pci, PciINTP); + if(pin == 0 || pin == 0xff) + continue; + + map = &e->maps[(pin - 1) * 3]; + link = map[0]; + irq = southbridge->get(sbpci, link); + if(irq == 0 || irq == pci->intl) + continue; + if(pci->intl != 0 && pci->intl != 0xFF) { + print("pcirouting: %τ at pin %d link %d irq %d -> %d\n", + tbdf, pin, link, irq, pci->intl); + southbridge->set(sbpci, link, pci->intl); + continue; + } + print("pcirouting: %τ at pin %d link %d irq %d\n", tbdf, pin, link, irq); + pcicfgw8(pci, PciINTL, irq); + pci->intl = irq; + } + } +} + +static void +pcicfginit(void) +{ + int bno, n; + Pcidev **list; + + if(pcicfgmode != -1) + return; + lock(&pcicfginitlock); + if(pcicfgmode != -1){ + unlock(&pcicfginitlock); + return; + } + + cmd_install("pcihinv", "-- pci inventory", cmd_pcihinv); + +// fmtinstall('τ', fmtT); + + /* + * Try to determine if PCI Mode1 configuration implemented. + * (Bits [30:24] of PciADDR must be 0, according to the spec.) + * Mode2 won't appear in 64-bit machines. + */ + n = inl(PciADDR); + if(!(n & 0x7F000000)){ + outl(PciADDR, 0x80000000); + outb(PciADDR+3, 0); + if(inl(PciADDR) & 0x80000000) + pcicfgmode = 1; + } + outl(PciADDR, n); + + if(pcicfgmode < 0){ + unlock(&pcicfginitlock); + return; + } + + list = &pciroot; + for(bno = 0; bno <= Maxbus; bno++) { + bno = pcilscan(bno, list); + while(*list) + list = &(*list)->link; + } + pcirouting(); +// pcireservemem(); + unlock(&pcicfginitlock); + + if(getconf("*pcihinv")) + pcihinv(pciroot, 1); +} + +static int +pcicfgrw(int tbdf, int r, int data, int rw, int w) +{ + int o, x, er; + + if(pcicfgmode == -1) + pcicfginit(); + if(pcicfgmode != 1) + return -1; + if(BUSDNO(tbdf) > Maxdev) + return -1; + + lock(&pcicfglock); + o = r & 4-w; + er = r&0xfc | (r & 0xf00)<<16; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|er); + if(rw == Read){ + x = -1; + switch(w){ + case 1: + x = inb(PciDATA+o); + break; + case 2: + x = ins(PciDATA+o); + break; + case 4: + x = inl(PciDATA+o); + break; + } + }else{ + x = 0; + switch(w){ + case 1: + outb(PciDATA+o, data); + break; + case 2: + outs(PciDATA+o, data); + break; + case 4: + outl(PciDATA+o, data); + break; + } + } +// outl(PciADDR, 0); + unlock(&pcicfglock); + + return x; +} + +int +pcicfgr8(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 1); +} + +void +pcicfgw8(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 1); +} + +int +pcicfgr16(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 2); +} + +void +pcicfgw16(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 2); +} + +int +pcicfgr32(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 4); +} + +void +pcicfgw32(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 4); +} + +void +pciclrmwi(Pcidev* p) +{ + p->pcr &= ~MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + + +Pcidev* +pcimatch(Pcidev* prev, int vid, int did) +{ + if(pcicfgmode == -1) + pcicfginit(); + + prev = prev? prev->list: pcilist; + for(; prev != nil; prev = prev->list){ + if((vid == 0 || prev->vid == vid) + && (did == 0 || prev->did == did)) + break; + } + return prev; +} + +Pcidev* +pcimatchtbdf(int tbdf) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + + for(p = nil; p = pcimatch(p, 0, 0); ) + if(p->tbdf == tbdf) + break; + return p; +} + +static char * +ccru2name(int ccru) +{ + switch (ccru>>8) { + case 0x01: /* mass storage controller */ + return "disks"; + case 0x02: /* network controller */ + return "net"; /* probably ether */ + case 0x03: /* display controller */ + return "video"; + case 0x04: /* multimedia device */ + return "audio"; + case 0x07: /* simple communication controllers */ + return "serial"; + case 0x08: /* base system peripherals */ + return "basic"; + case 0x09: /* input devices */ + return "input"; + case 0x0A: /* docking stations */ + return "dock"; + case 0x0B: /* processors */ + return "cpu"; + case 0x0C: /* serial bus controllers */ + return "usb"; + case 0x00: + return "memct0"; + case 0x05: /* memory controller */ + return "memctl"; + case 0x06: /* bridge device */ + return "bridge"; + default: + return "*gok*"; + } +} + +static char * +vid2name(int vid) +{ + switch (vid) { + case 0x1000: + return "ncr"; + case 0x1002: + return "ati"; + case 0x100b: + return "natsemi"; + case 0x1011: + return "dec"; + case 0x1013: + return "cirrus"; + case 0x1022: + return "amd"; + case 0x1023: + return "cyber?"; + case 0x102b: + return "matrox"; + case 0x102c: + return "hiq"; + case 0x1039: + return "sis"; + case 0x104b: + return "mylex"; + case 0x105a: + return "promise"; + case 0x105d: + return "number9"; + case 0x10a9: + return "sgi"; + case 0x10b7: + return "3com"; + case 0x10c8: + return "neomagic"; /* or magicgraph */ + case 0x10de: + return "nvidia"; + case 0x10ec: + return "realtek"; + case 0x11ab: + return "marvell"; + case 0x11ad: + return "(pnic?)"; + case 0x121a: + return "voodoo"; + case 0x12ae: + return "alteon"; + case 0x1385: + return "netgear"; + case 0x14c1: + return "myri"; + case 0x15ad: + return "vmware"; + case 0x16ec: + return "usrobot"; + case 0x5333: /* "S" "3". har, har. */ + return "s3"; + case 0x8086: + return "intel"; + default: + return "*GOK*"; + } +} + +void +pcihinv(Pcidev* p, uint flags) +{ + int i; + Pcidev *t; + + if(p == nil) { + p = pciroot; + print("bus dev type "); + if (flags) + print("%7s", ""); + print("vid "); + if (flags) + print("%8s", ""); + print("did intl memory\n"); + } + for(t = p; t != nil; t = t->link) { + print("%d.%2d.%d %.4ux", BUSBNO(t->tbdf), BUSDNO(t->tbdf), + BUSFNO(t->tbdf), t->ccru); + if (flags) + print(" %-6s", ccru2name(t->ccru)); + print(" %.4ux", t->vid); + if (flags) + print(" %-7s", vid2name(t->vid)); + print(" %.4ux %2d ", t->did, t->intl); + + for(i = 0; i < nelem(p->mem); i++) { + if(t->mem[i].size == 0) + continue; + print("%d:%#P %d ", i, + t->mem[i].bar, t->mem[i].size); + } + if(t->bridge) + print("->%d", BUSBNO(t->bridge->tbdf)); + print("\n"); + } + while(p != nil) { + if(p->bridge != nil) + pcihinv(p->bridge, flags); + p = p->link; + } +} + +void +pcireset(void) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + for(p = pcilist; p != nil; p = p->list) + pciclrbme(p); +} + +void +pcisetbme(Pcidev* p) +{ + p->pcr |= MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrbme(Pcidev* p) +{ + p->pcr &= ~MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +int +pcicap(Pcidev *p, int cap) +{ + int i, c, off; + + /* status register bit 4 has capabilities */ + if((pcicfgr16(p, PciPSR) & 1<<4) == 0) + return -1; + switch(pcicfgr8(p, PciHDT) & 0x7f){ + default: + return -1; + case 0: /* etc */ + case 1: /* pci to pci bridge */ + off = 0x34; + break; + case 2: /* cardbus bridge */ + off = 0x14; + break; + } + for(i = 48; i--;){ + off = pcicfgr8(p, off); + if(off < 0x40 || (off & 3)) + break; + off &= ~3; + c = pcicfgr8(p, off); + if(c == 0xff) + break; + if(c == cap) + return off; + off++; + } + return -1; +} + --- /sys/src/fs/amd64/trap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/trap.c Tue Aug 27 16:10:56 2013 @@ -0,0 +1,539 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define iprint(...) print(__VA_ARGS__) +#define up u +#define xalloc(x) ialloc(x, 0) + +static void debugbpt(Ureg*, void*); +static void faultamd64(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void expected(Ureg*, void*); +static void dumpstackwithureg(Ureg*); + +static Lock vctllock; +/*static*/ Vctl *vctl[256]; + +typedef struct Intrtime Intrtime; +struct Intrtime { + uvlong count; + uvlong cycles; +}; +static Intrtime intrtimes[256]; +static Intrtime machtimes[MACHMAX]; + +static int trapinited; +extern int ioapicintrenable(Vctl*); + +int +intraffinity(void *vv) +{ + Vctl *v; + + v = vv; + return v->affinity; +} + +void* +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + extern int ioapicintrenable(Vctl*); + + if(f == nil){ + print("intrenable: nil handler for %d, tbdf %#ux for %s\n", + irq, tbdf, name); + return nil; + } + + v = ialloc(sizeof(Vctl), 0); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->affinity = -1; + v->f = f; + v->a = a; + strncpy(v->name, name, NAMELEN-1); + v->name[NAMELEN-1] = 0; + + ilock(&vctllock); + vno = ioapicintrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf %#ux for %s\n", + irq, tbdf, v->name); + // free(v); + return nil; + } + if(vno >= nelem(vctl)) + panic("vno: %d\n", vno); + if(vctl[vno] != nil){ + if(vctl[v->vno]->isr != v->isr || vctl[v->vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[v->vno]->name, v->name, + vctl[v->vno]->isr, v->isr, vctl[v->vno]->eoi, v->eoi); + } + v->vno = vno; + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->mask) + v->mask(v, 0); + + /* + * Return the assigned vector so intrdisable can find + * the handler; the IRQ is useless in the wonderful world + * of the IOAPIC. + */ + return v; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= 256) + panic("trapenable: vno %d", vno); + v = xalloc(sizeof(Vctl)); + v->type = "trap"; + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, NAMELEN); + v->name[NAMELEN-1] = 0; + + ilock(&vctllock); + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->next) + panic("trapenable: chained trap %d %s from %#p\n", + vno, name, getcallerpc(&vno)); +} + +static void +nmienable(void) +{ + int x; + + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +} + +void +trapinit(void) +{ + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(VectorBPT, debugbpt, 0, "#BP"); + trapenable(VectorPF, faultamd64, 0, "#PF"); + trapenable(Vector2F, doublefault, 0, "#DF"); + trapenable(Vector15, unexpected, 0, "#15"); + trapenable(IdtIPI, expected, 0, "#IPI"); + nmienable(); + + trapinited = 1; +} + +static char* excname[32] = { + "#DE", /* Divide-by-Zero Error */ + "#DB", /* Debug */ + "#NMI", /* Non-Maskable-Interrupt */ + "#BP", /* Breakpoint */ + "#OF", /* Overflow */ + "#BR", /* Bound-Range */ + "#UD", /* Invalid-Opcode */ + "#NM", /* Device-Not-Available */ + "#DF", /* Double-Fault */ + "#9 (reserved)", + "#TS", /* Invalid-TSS */ + "#NP", /* Segment-Not-Present */ + "#SS", /* Stack */ + "#GP", /* General-Protection */ + "#PF", /* Page-Fault */ + "#15 (reserved)", + "#MF", /* x87 FPE-Pending */ + "#AC", /* Alignment-Check */ + "#MC", /* Machine-Check */ + "#XF", /* SIMD Floating-Point */ + "#20 (reserved)", + "#21 (reserved)", + "#22 (reserved)", + "#23 (reserved)", + "#24 (reserved)", + "#25 (reserved)", + "#26 (reserved)", + "#27 (reserved)", + "#28 (reserved)", + "#29 (reserved)", + "#30 (reserved)", + "#31 (reserved)", +}; + +/* + * keep interrupt service times and counts + */ +void +intrtime(int vno) +{ + uvlong diff, x; + Intrtime *t; + + x = rdtsc(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + t = intrtimes + vno; + t->cycles += diff; + t->count++; + t = machtimes + m->machno; + t->cycles += diff; + t->count++; +} + +/* + * All traps come here. It is slower to have all traps call trap() + * rather than directly vectoring the handler. However, this avoids a + * lot of code duplication and possible bugs. The only exception is + * VectorSYSCALL. + * Trap is called with interrupts disabled via interrupt-gates. + */ +void +trap(Ureg* ureg) +{ + int clockintr, vno; + Vctl *ctl, *v; + + if(!trapinited){ + /* fault can give a better error message */ + if(ureg->type == VectorPF) + faultamd64(ureg, nil); + if(ureg->type < nelem(excname)) + panic("trap %llud: %s: not ready %#p", ureg->type, excname[ureg->type], getcr2()); + else + panic("trap %llud: not ready", ureg->type); + } + + m->perf.intrts = rdtsc(); + clockintr = 0; + + vno = ureg->type; + if(ctl = vctl[vno]){ + if(ctl->isintr){ + // m->intr++; + if(vno >= VectorPIC && vno != VectorSYSCALL) + m->lastintr = ctl->irq; + } + + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + if(ctl->eoi) + ctl->eoi(vno); + + if(ctl->isintr){ + intrtime(vno); + + if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) + clockintr = 1; + + // if(up && !clockintr) + // preempted(); + } + } + else if(vno >= VectorPIC && vno != VectorSYSCALL){ + /* + * An unknown interrupt. + * Check for a default IRQ7. This can happen when + * the IRQ input goes away before the acknowledge. + * In this case, a 'default IRQ7' is generated, but + * the corresponding bit in the ISR isn't set. + * In fact, just ignore all such interrupts. + */ + + /* clear the interrupt */ + // i8259isr(vno); +lapiceoi(0); + print("cpu%d: spurious interrupt %d, last %d\n", + m->machno, vno, m->lastintr); + m->spuriousintr++; + intrtime(vno); + return; + } + else{ + if(vno == VectorNMI){ + nmienable(); + if(m->machno != 0){ + iprint("cpu%d: PC %#p\n", + m->machno, ureg->ip); + for(;;); + } + } + dumpregs(ureg); + if(vno < nelem(excname)) + panic("%s", excname[vno]); + panic("unknown trap/intr: %d", vno); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + sched(); + splhi(); + } +} + +void +trapstats(void) +{ +} + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + ureg.ip = getcallerpc(&fn); + ureg.sp = PTR2UINT(&fn); + fn(&ureg); +} + +static void +dumpstackwithureg(Ureg* ureg) +{ + char *s; + uintptr l, v, i, estack; + extern ulong etext; /* ahem */ + + if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){ + iprint("dumpstack disabled\n"); + return; + } + iprint("dumpstack\n"); +prflush(); + iprint("ktrace 9%s %#p %#p\n", "iveyfs", ureg->ip, ureg->sp); + i = 0; + if(u != nil + && (uintptr)&l >= (uintptr)u->stack + && (uintptr)&l <= (uintptr)u->stack+MAXSTACK) + estack = (uintptr)u->stack+KSTACK; + /* botch — where's the mach stack!? */ + else if((uintptr)&l >= m->stack && (uintptr)&l <= m->stack+MACHSTKSZ) + estack = m->stack+MACHSTKSZ; + else{ + if(u != nil) + iprint("&u->stack %#p &l %#p\n", u->stack, &l); + else + iprint("&m %#p &l %#p\n", m, &l); + return; + } + iprint("estackx %#p\n", estack); +prflush(); + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + v = *(uintptr*)l; + if((KTZERO < v && v < (uintptr)&etext) + || ((uintptr)&l < v && v < estack) || estack-l < 256){ + iprint("%#16.16p=%#16.16p ", l, v); + i++; + } + if(i == 2){ + i = 0; + iprint("\n"); + prflush(); + delay(10); + } + } + if(i) + iprint("\n"); +prflush(); +} + +void +dumpstack(User *) +{ + callwithureg(dumpstackwithureg); +} + +void +dumpstack_(void) +{ + callwithureg(dumpstackwithureg); +} + +static void +debugbpt(Ureg*, void*) +{ + panic("kernel bpt"); +} + +void dumpgpr(Ureg* ureg); +static void +doublefault(Ureg *ureg, void*) +{ + dumpgpr(ureg); + panic("cpu%d: double fault: pc %#p", m->machno, ureg->ip); +} + +static void +unexpected(Ureg* ureg, void*) +{ + iprint("cpu%d: unexpected trap %llud; ignoring\n", m->machno, ureg->type); +} + +static void +expected(Ureg*, void*) +{ +} + +static void +faultamd64(Ureg* ureg, void*) +{ + int pid; + u64int addr; + + addr = getcr2(); + pid = -1; + if(up) + pid = up->pid; + panic("cpu%d: fault with up %d; pc %#p addr %#p\n", m->machno, pid, ureg->ip, addr); +} + +/* + * Dump general registers. + */ +void +dumpgpr(Ureg* ureg) +{ + if(up != nil) + iprint("cpu%d: registers for %s %d\n", + m->machno, up->text, up->pid); + else + iprint("cpu%d: registers for kernel\n", m->machno); + + iprint("ax\t%#16.16llux\n", ureg->ax); + iprint("bx\t%#16.16llux\n", ureg->bx); + iprint("cx\t%#16.16llux\n", ureg->cx); + iprint("dx\t%#16.16llux\n", ureg->dx); + iprint("di\t%#16.16llux\n", ureg->di); + iprint("si\t%#16.16llux\n", ureg->si); + iprint("bp\t%#16.16llux\n", ureg->bp); + iprint("r8\t%#16.16llux\n", ureg->r8); + iprint("r9\t%#16.16llux\n", ureg->r9); + iprint("r10\t%#16.16llux\n", ureg->r10); + iprint("r11\t%#16.16llux\n", ureg->r11); + iprint("r12\t%#16.16llux\n", ureg->r12); + iprint("r13\t%#16.16llux\n", ureg->r13); + iprint("r14\t%#16.16llux\n", ureg->r14); + iprint("r15\t%#16.16llux\n", ureg->r15); + iprint("ds %#4.4ux es %#4.4ux fs %#4.4ux gs %#4.4ux\n", + ureg->ds, ureg->es, ureg->fs, ureg->gs); + iprint("ureg fs\t%#ux\n", *(unsigned int *)&ureg->ds); + iprint("type\t%#llux\n", ureg->type); + iprint("error\t%#llux\n", ureg->error); + iprint("pc\t%#llux\n", ureg->ip); + iprint("cs\t%#llux\n", ureg->cs); + iprint("flags\t%#llux\n", ureg->flags); + iprint("sp\t%#llux\n", ureg->sp); + iprint("ss\t%#llux\n", ureg->ss); + iprint("type\t%#llux\n", ureg->type); +// iprint("FS\t%#llux\n", rdmsr(FSbase)); +// iprint("GS\t%#llux\n", rdmsr(GSbase)); + + iprint("m\t%#16.16p\nup\t%#16.16p\n", m, up); +} + +void +dumpregs(Ureg* ureg) +{ + dumpgpr(ureg); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + iprint("cr0\t%#16.16llux\n", getcr0()); + iprint("cr2\t%#16.16llux\n", getcr2()); + iprint("cr3\t%#16.16llux\n", getcr3()); + +// archdumpregs(); +} + +void +cmd_machvec(int, char**) +{ + int i; + Mach *mach; + Intrtime *t; + + print("%s %-10s %-18s\n", "mach", "count", "cycles"); + prflush(); + + for(i = 0; i < conf.nmach; i++){ + if((mach = sys->machptr[i]) == nil) + continue; + t = machtimes + i; + print("%d/%d %-10lld %-18lld\n", i, mach->machno, t->count, t->cycles); + prflush(); + } +} + +/*static*/ void +cmd_vec(int argc, char **argv) +{ + char aff[8], tbdf[16], *prefix; + int i; + Vctl *v; + Intrtime *t; + + USED(argc, argv); + print("%s %s %-16s %-10s %-18s %s %s\n", + "vec", "aff", "tbdf", "count", "cycles", "type", "name"); + prflush(); + + for(i = 0; i < 256; i++){ + prefix = ""; + t = intrtimes + i; + for(v = vctl[i]; v != nil; v = v->next){ + if(strcmp(v->type, "trap") == 0 || strcmp(v->type, "lapic") == 0){ + snprint(aff, sizeof aff, "--"); + snprint(tbdf, sizeof tbdf, "--"); + } + else{ + snprint(aff, sizeof aff, "%d", v->affinity); + snprint(tbdf, sizeof tbdf, "%τ", v->tbdf); + } + print("%s%d %s %-16s %-10lld %-18lld %s %s\n", + prefix, i, aff, tbdf, t->count, t->cycles, v->type, v->name); + prflush(); + prefix = "*"; + } + } +} --- /sys/src/fs/amd64/vsvm.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/vsvm.c Tue Aug 27 16:10:56 2013 @@ -0,0 +1,189 @@ +/* + * Vestigial Segmented Virtual Memory. + * To do: + * dynamic allocation and free of descriptors; + * IST should perhaps point to a different handler; + * user-level descriptors (if not dynamic). + */ +#include "all.h" + +#include "amd64.h" +#include "ureg.h" + +typedef struct Gd Gd; +typedef u64int Sd; +typedef u16int Ss; +typedef struct Tss Tss; + +struct Gd { + Sd sd; + u64int hi; +}; + +struct Tss { + u32int _0_; + u32int rsp0[2]; + u32int rsp1[2]; + u32int rsp2[2]; + u32int _28_[2]; + u32int ist[14]; + u16int _92_[5]; + u16int iomap; +}; + +enum { + Ngdt = 16, /* max. entries in gdt */ + Nidt = 256, /* max. entries in idt */ +}; + +static Sd gdt64[Ngdt] = { + 0ull, /* NULL descriptor */ + SdL|SdP|SdDPL0|SdS|SdCODE, /* CS */ + SdG|SdD|SdP|SdDPL0|SdS|SdW, /* DS */ + SdG|SdD|SdP|SdDPL3|SdS|SdCODE|SdR|Sd4G, /* User CS 32-bit */ + SdG|SdD|SdP|SdDPL3|SdS|SdW|Sd4G, /* User DS */ + SdL|SdP|SdDPL3|SdS|SdCODE, /* User CS 64-bit */ + + 0ull, /* FS */ + 0ull, /* GS */ + + 0ull, /* TSS lower */ + 0ull, /* TSS upper */ +}; +static int ngdt64 = 10; + +static Gd idt64[Nidt]; + +static Sd +mksd(u64int base, u64int limit, u64int bits, u64int* upper) +{ + Sd sd; + + sd = bits; + sd |= (((limit & 0x00000000000f0000ull)>>16)<<48) + |(limit & 0x000000000000ffffull); + sd |= (((base & 0x00000000ff000000ull)>>24)<<56) + |(((base & 0x0000000000ff0000ull)>>16)<<32) + |((base & 0x000000000000ffffull)<<16); + if(upper != nil) + *upper = base>>32; + + return sd; +} + +static void +mkgd(Gd* gd, u64int offset, Ss ss, u64int bits, int ist) +{ + Sd sd; + + sd = bits; + sd |= (((offset & 0x00000000ffff0000ull)>>16)<<48) + |(offset & 0x000000000000ffffull); + sd |= ((ss & 0x000000000000ffffull)<<16); + sd |= (ist & (SdISTM>>32))<<32; + gd->sd = sd; + gd->hi = offset>>32; +} + +static void +idtinit(Gd *gd, uintptr offset) +{ + int ist, v; + u64int dpl; + + for(v = 0; v < Nidt; v++){ + ist = 0; + dpl = SdP|SdDPL0|SdIG; + switch(v){ + default: + break; + case IdtBP: /* #BP */ + dpl = SdP|SdDPL3|SdIG; + break; + case IdtDF: /* #DF */ + ist = 1; + break; + } + mkgd(gd, offset, SSEL(SiCS, SsTIGDT|SsRPL0), dpl, ist); + gd++; + offset += 6; + } +} + +void +tssrsp0(uintptr sp) +{ + Tss *tss; + + tss = m->tss; + tss->rsp0[0] = sp; + tss->rsp0[1] = sp>>32; +} + +static void +tssinit(uintptr sp) +{ + int ist; + Tss *tss; + + tss = m->tss; + memset(tss, 0, sizeof(Tss)); + + tssrsp0(sp); + + sp = PTR2UINT(m->vsvm+PGSZ); + for(ist = 0; ist < 14; ist += 2){ + tss->ist[ist] = sp; + tss->ist[ist+1] = sp>>32; + } + tss->iomap = 0xdfff; +} + +static void +syscallentry(void) +{ + print("syscall: *gok*\n"); +} + +void +vsvminit(int size) +{ + Sd *sd; + u64int r; + + if(m->machno == 0){ + idtinit(idt64, PTR2UINT(idthandlers)); + } + + m->gdt = m->vsvm; + memmove(m->gdt, gdt64, sizeof(gdt64)); + m->tss = &m->vsvm[ROUNDUP(sizeof(gdt64), 16)]; + + sd = &((Sd*)m->gdt)[SiTSS]; + *sd = mksd(PTR2UINT(m->tss), sizeof(Tss)-1, SdP|SdDPL0|SdaTSS, sd+1); + + tssinit(m->stack+size); + + gdtput(sizeof(gdt64)-1, PTR2UINT(m->gdt), SSEL(SiCS, SsTIGDT|SsRPL0)); + idtput(sizeof(idt64)-1, PTR2UINT(idt64)); + trput(SSEL(SiTSS, SsTIGDT|SsRPL0)); + + wrmsr(FSbase, 0ull); + wrmsr(GSbase, PTR2UINT(&sys->machptr[m->machno])); + wrmsr(KernelGSbase, 0ull); + + r = rdmsr(Efer); + r |= Sce; + wrmsr(Efer, r); + r = ((u64int)SSEL(SiU32CS, SsRPL3))<<48; + r |= ((u64int)SSEL(SiCS, SsRPL0))<<32; + wrmsr(Star, r); + wrmsr(Lstar, PTR2UINT(syscallentry)); + wrmsr(Sfmask, If); +} + +//int +//userureg(Ureg* ureg) +//{ +// return ureg->cs == SSEL(SiUCS, SsRPL3); +//} --- /sys/src/fs/amd64/ether82598.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/ether82598.c Tue Aug 27 16:10:57 2013 @@ -0,0 +1,985 @@ +/* + * intel 10gbe pcie driver + * copyright © 2007—2012, coraid, inc. + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" + +enum{ + /* general */ + Ctrl = 0x00000/4, /* Device Control */ + Status = 0x00008/4, /* Device Status */ + Ctrlext = 0x00018/4, /* Extended Device Control */ + Esdp = 0x00020/4, /* extended sdp control */ + Esodp = 0x00028/4, /* extended od sdp control */ + Ledctl = 0x00200/4, /* led control */ + Tcptimer = 0x0004c/4, /* tcp timer */ + Ecc = 0x110b0/4, /* errata ecc control magic */ + + /* nvm */ + Eec = 0x10010/4, /* eeprom/flash control */ + Eerd = 0x10014/4, /* eeprom read */ + Fla = 0x1001c/4, /* flash access */ + Flop = 0x1013c/4, /* flash opcode */ + Grc = 0x10200/4, /* general rx control */ + + /* interrupt */ + Icr = 0x00800/4, /* interrupt cause read */ + Ics = 0x00808/4, /* " set */ + Ims = 0x00880/4, /* " mask read/set */ + Imc = 0x00888/4, /* " mask clear */ + Iac = 0x00810/4, /* " auto clear */ + Iam = 0x00890/4, /* " auto mask enable */ + Itr = 0x00820/4, /* " throttling rate (0-19) */ + Ivar = 0x00900/4, /* " vector allocation regs. */ + /*msi interrupt */ + Msixt = 0x0000/4, /* msix table (bar3) */ + Msipba = 0x2000/4, /* msix pending bit array (bar3) */ + Pbacl = 0x11068/4, /* pba clear */ + Gpie = 0x00898/4, /* general purpose int enable */ + + /* flow control */ + Pfctop = 0x03008/4, /* priority flow ctl type opcode */ + Fcttv = 0x03200/4, /* " transmit timer value (0-3) */ + Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */ + Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */ + Rcrtv = 0x032a0/4, /* " refresh value threshold */ + Tfcs = 0x0ce00/4, /* " tx status */ + + /* rx dma */ + Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */ + Rbah = 0x01004/4, /* " high */ + Rdlen = 0x01008/4, /* " length */ + Rdh = 0x01010/4, /* " head */ + Rdt = 0x01018/4, /* " tail */ + Rxdctl = 0x01028/4, /* " control */ + + Srrctl = 0x02100/4, /* split and replication rx ctl. */ + Dcarxctl = 0x02200/4, /* rx dca control */ + Rdrxctl = 0x02f00/4, /* rx dma control */ + Rxpbsize = 0x03c00/4, /* rx packet buffer size */ + Rxctl = 0x03000/4, /* rx control */ + Dropen = 0x03d04/4, /* drop enable control */ + + /* rx */ + Rxcsum = 0x05000/4, /* rx checksum control */ + Rfctl = 0x04008/4, /* rx filter control */ + Mta = 0x05200/4, /* multicast table array (0-127) */ + Ral = 0x05400/4, /* rx address low */ + Rah = 0x05404/4, + Psrtype = 0x05480/4, /* packet split rx type. */ + Vfta = 0x0a000/4, /* vlan filter table array. */ + Fctrl = 0x05080/4, /* filter control */ + Vlnctrl = 0x05088/4, /* vlan control */ + Msctctrl = 0x05090/4, /* multicast control */ + Mrqc = 0x05818/4, /* multiple rx queues cmd */ + Vmdctl = 0x0581c/4, /* vmdq control */ + Imir = 0x05a80/4, /* immediate irq rx (0-7) */ + Imirext = 0x05aa0/4, /* immediate irq rx ext */ + Imirvp = 0x05ac0/4, /* immediate irq vlan priority */ + Reta = 0x05c00/4, /* redirection table */ + Rssrk = 0x05c80/4, /* rss random key */ + + /* tx */ + Tdbal = 0x06000/4, /* tx desc base low +0x40n */ + Tdbah = 0x06004/4, /* " high */ + Tdlen = 0x06008/4, /* " len */ + Tdh = 0x06010/4, /* " head */ + Tdt = 0x06018/4, /* " tail */ + Txdctl = 0x06028/4, /* " control */ + Tdwbal = 0x06038/4, /* " write-back address low */ + Tdwbah = 0x0603c/4, + + Dtxctl = 0x04a80/4, /* tx dma control !82598 */ + Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */ + Tipg = 0x0cb00/4, /* tx inter-packet gap */ + Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */ + + /* mac */ + Hlreg0 = 0x04240/4, /* highlander control reg 0 */ + Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */ + Msca = 0x0425c/4, /* mdi signal cmd & addr */ + Msrwd = 0x04260/4, /* mdi single rw data */ + Mhadd = 0x04268/4, /* mac addr high & max frame */ + Pcss1 = 0x04288/4, /* xgxs status 1 */ + Pcss2 = 0x0428c/4, + Xpcss = 0x04290/4, /* 10gb-x pcs status */ + Serdesc = 0x04298/4, /* serdes control */ + Macs = 0x0429c/4, /* fifo control & report */ + Autoc = 0x042a0/4, /* autodetect control & status */ + Links = 0x042a4/4, /* link status */ + Autoc2 = 0x042a8/4, +}; + +enum{ + /* Ctrl */ + Rst = 1<<26, /* full nic reset */ + + /* Txdctl */ + Ten = 1<<25, + + /* Dtxctl */ + Den = 1<<0, + + /* Fctrl */ + Rfce = 1<<15, /* rcv flow control enable */ + Dpf = 1<<13, /* discard pause frames */ + Bam = 1<<10, /* broadcast accept mode */ + Upe = 1<<9, /* unicast promiscuous */ + Mpe = 1<<8, /* multicast promiscuous */ + + /* Rxdctl */ + Pthresh = 0, /* prefresh threshold shift in bits */ + Hthresh = 8, /* host buffer minimum threshold " */ + Wthresh = 16, /* writeback threshold */ + Renable = 1<<25, + + /* Rxctl */ + Rxen = 1<<0, + Dmbyps = 1<<1, + + /* Rdrxctl */ + Rdmt½ = 0, + Rdmt¼ = 1, + Rdmt⅛ = 2, + + /* Rxcsum */ + Ippcse = 1<<12, /* ip payload checksum enable */ + + /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ + + /* interrupts */ + Irx0 = 1<<0, /* driver defined */ + Itx0 = 1<<1, /* driver defined */ + Lsc = 1<<20, /* link status change */ + Ioc = 1<<31, /* other cause */ + + /* Links */ + Lnkup = 1<<30, + Lnkspd8 = 1<<29, + Lnkspd9 = 3<<28, + + /* Hlreg0 */ + Txcrcen = 1<<0, + Jumboen = 1<<2, + + /* Ivar */ + Ivtx = 1|1<<7, /* transmit interrupt */ + Ivrx = 0|1<<7, /* receive interrupt */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlrtype Ctlrtype; +typedef struct Rd Rd; +typedef struct Rbpool Rbpool; +typedef struct Stat Stat; +typedef struct Td Td; + +enum { + i82598, + i82599, + x540, + Nctlrtype, +}; + +struct Ctlrtype { + int type; + int mtu; + int flag; + char *name; +}; + +enum { + Fphyoc = 1<<0, /* phy link needs other cause interrupt */ + Fsplitivar = 1<<1, /* tx and rx use different ivar entries */ + Fphyspd = 1<<2, /* phy speed useful (part supports <10gbe) */ + Ftxctl = 1<<3, /* part has txctl register */ +}; + +/* real mtu is 12k. use standard 9k to save memory */ +static Ctlrtype cttab[Nctlrtype] = { + i82598, 9*1024, Fsplitivar|Fphyoc, "i82598", + i82599, 9*1024, Fphyspd|Ftxctl, "i82599", + x540, 9*1024, Fphyspd|Ftxctl, "x540", +}; + +/* status */ +enum{ + Pif = 1<<7, /* past exact filter (sic) */ + Ipcs = 1<<6, /* ip checksum calcuated */ + L4cs = 1<<5, /* layer 2 */ + Tcpcs = 1<<4, /* tcp checksum calcuated */ + Vp = 1<<3, /* 802.1q packet matched vet */ + Ixsm = 1<<2, /* ignore checksum */ + Reop = 1<<1, /* end of packet */ + Rdd = 1<<0, /* descriptor done */ +}; + +struct Rd { + u32int addr[2]; + u16int length; + u16int cksum; + uchar status; + uchar errors; + u16int vlan; +}; + +enum{ + /* Td cmd */ + Rs = 1<<3, + Ic = 1<<2, + Ifcs = 1<<1, + Teop = 1<<0, + + /* Td status */ + Tdd = 1<<0, +}; + +struct Td { + u32int addr[2]; + u16int length; + uchar cso; + uchar cmd; + uchar status; + uchar css; + u16int vlan; +}; + +enum{ + Factive = 1<<0, + Fstarted = 1<<1, +}; + +typedef void (*Freefn)(Msgbuf*); + +struct Ctlr { + Pcidev *p; + uintmem port; + u32int *reg; + uchar flag; + uint poolno; + Rbpool *pool; + int nrd, ntd, nrb, rbsz; + QLock slock, alock, tlock; + Rendez lrendez, trendez, rrendez; + uint im, lim, rim, xtim; + Lock imlock; + char *alloc; + Rd *rdba; + Msgbuf **rb; + uint rdt, rdfree; + Td *tdba; + uint tdh, tdt; + Msgbuf **tb; + uchar ra[Easize]; + uchar mta[128]; +// uvlong stats[nelem(stattab)]; + int type; + uint speeds[4]; + uint nobufs; + + char tname[28]; + char rname[28]; +}; + +struct Rbpool { + union { + struct { + Lock; + Msgbuf *b; + uint nstarve; + uint nwakey; + uint starve; + Rendez; + }; + uchar pad[64]; /* cacheline */ + }; + union { + struct { + Msgbuf *x; + uint nfast; + uint nslow; + }; + uchar pad[64]; /* cacheline */ + }; +}; + +/* tweakable parameters */ +enum{ + Nrd = 256, + Ntd = 256, + Nrb = 2048, + Nctlr = 8, + Rbalign = 8, /* ideally, 4k */ +}; + +static Ctlr *ctlrtab[Nctlr]; +static Lock rblock[Nctlr]; +static Rbpool rbtab[Nctlr]; +static int nctlr; + +char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static void +im(Ctlr *c, int i) +{ + ilock(&c->imlock); + c->im |= i; + c->reg[Ims] = c->im; + iunlock(&c->imlock); +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Msgbuf* +rballoc(Rbpool *p) +{ + Msgbuf *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + b->flags &= ~FREE; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->starve = 1; + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Msgbuf *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->data = (uchar*)ROUNDUP((uintptr)b->xdata, Rbalign); + b->count = 0; + b->flags = FREE; + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(1) + print("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Msgbuf *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Msgbuf *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Msgbuf *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Msgbuf *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Msgbuf *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Msgbuf *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Msgbuf *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Msgbuf *b) +{ + rbfree(b, 7); +} + +static Freefn freetab[Nctlr] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, +}; + +#define Next(x, m) (((x)+1) & (m)) +static int +cleanup(Ctlr *c, int tdh) +{ + Msgbuf *b; + uint m, n; + + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status&Tdd){ + tdh = n; + b = c->tb[tdh]; + c->tb[tdh] = 0; + mbfree(b); + c->tdba[tdh].status = 0; + } + return tdh; +} + +static void +transmit(Ether *e) +{ + uint i, m, tdt, tdh; + Ctlr *c; + Msgbuf *b; + Td *t; + + c = e->ctlr; +// qlock(&c->tlock); + if(!canqlock(&c->tlock)){ + im(c, Itx0); + return; + } + tdh = c->tdh = cleanup(c, c->tdh); + tdt = c->tdt; + m = c->ntd-1; + for(i = 0; i<8; i++){ + if(Next(tdt, m) == tdh){ + im(c, Itx0); + break; + } + if((b = etheroq(e)) == nil) + break; + t = c->tdba+tdt; + t->addr[0] = Pciwaddrl(b->data); + t->addr[1] = Pciwaddrh(b->data); + t->length = b->count; + t->cmd = Rs|Ifcs|Teop; + c->tb[tdt] = b; + tdt = Next(tdt, m); + } + if(i){ + c->tdt = tdt; + coherence(); + c->reg[Tdt] = tdt; + } + qunlock(&c->tlock); +} + +static int +xtim(void *c) +{ + return ((Ctlr*)c)->xtim != 0; +} + +static void +tproc(void) +{ + Ether *e; + Ctlr *c; + + e = u->arg; + c = e->ctlr; +loop: + sleep(&c->trendez, xtim, c); /* transmit kicks us */ + c->xtim = 0; + transmit(e); + goto loop; +} + +static void +rxinit(Ctlr *c) +{ + Msgbuf *b; + int i; + + c->reg[Rxctl] &= ~Rxen; + for(i = 0; inrd; i++){ + b = c->rb[i]; + c->rb[i] = 0; + if(b) + mbfree(b); + } + c->rdfree = 0; + + c->reg[Fctrl] |= Bam|Rfce|Dpf; + c->reg[Rxcsum] |= Ipcs; + c->reg[Srrctl] = (c->rbsz+1023)/1024; + c->reg[Mhadd] = c->rbsz<<16; + c->reg[Hlreg0] |= Txcrcen|Jumboen; + + c->reg[Rbal] = PCIWADDR(c->rdba); + c->reg[Rbah] = 0; + c->reg[Rdlen] = c->nrd*sizeof(Rd); + c->reg[Rdh] = 0; + c->reg[Rdt] = c->rdt = 0; + + c->reg[Rdrxctl] = Rdmt¼; + c->reg[Rxdctl] = 8<reg[Rxctl] |= Rxen|Dmbyps; +} + +static int +replenish(Ctlr *c, uint rdh, int maysleep) +{ + int rdt, m, i; + Msgbuf *b; + Rd *r; + Rbpool *p; + + m = c->nrd-1; + i = 0; + p = c->pool; + for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){ + r = c->rdba+rdt; + while((b = rballoc(c->pool)) == nil){ + c->nobufs++; + if(maysleep == 0) + goto nobufs; + if(1){ + print("%s:%d: starve\n", cname(c), c->poolno); + } + sleep(p, icansleep, p); + } + c->rb[rdt] = b; + r->addr[0] = Pciwaddrl(b->data); + r->addr[1] = Pciwaddrh(b->data); + r->status = 0; + c->rdfree++; + i++; + } +nobufs: + if(i){ + coherence(); + c->reg[Rdt] = c->rdt = rdt; + } + if(rdt == rdh) + return -1; + return 0; +} + +static int +rim(void *v) +{ + return ((Ctlr*)v)->rim != 0; +} + +static void +rproc(void) +{ + Ether *e; + Ctlr *c; + Msgbuf *b; + Rd *r; + uint m, rdh; + + e = u->arg; + c = e->ctlr; + m = c->nrd-1; + rdh = 0; +loop: + replenish(c, rdh, 1); + im(c, Irx0); + sleep(&c->rrendez, rim, c); +loop1: + c->rim = 0; + if(c->nrd-c->rdfree >= 16) + if(replenish(c, rdh, 0) == -1) + goto loop; + r = c->rdba+rdh; + if(!(r->status&Rdd)) + goto loop; + b = c->rb[rdh]; + c->rb[rdh] = 0; + b->count = r->length; + if(!(r->status&Ixsm)){ + if(r->status&Ipcs) + b->flags |= Bipck; + if(r->status&Tcpcs) + b->flags |= Btcpck|Budpck; + // b->checksum = r->cksum; + } + r->status = 0; + etheriq(e, b); + c->rdfree--; + rdh = Next(rdh, m); + goto loop1; +} + +static int +detach(Ctlr *c) +{ + int i; + + c->reg[Imc] = ~0; + c->reg[Ctrl] |= Rst; + for(i = 0; i < 100; i++){ + delay(1); + if((c->reg[Ctrl]&Rst) == 0) + goto good; + } + return -1; +good: + /* errata */ + delay(50); + c->reg[Ecc] &= ~(1<<21|1<<18|1<<9|1<<6); + + /* not cleared by reset; kill it manually. */ + for(i = 1; i<16; i++) + c->reg[Rah] &= ~(1<<31); + for(i = 0; i<128; i++) + c->reg[Mta+i] = 0; + for(i = 1; i<640; i++) + c->reg[Vfta+i] = 0; + return 0; +} + +static void +shutdown(Ether *e) +{ + detach(e->ctlr); +} + +/* ≤ 20ms */ +static ushort +eeread(Ctlr *c, int i) +{ + c->reg[Eerd] = EEstart|i<<2; + while((c->reg[Eerd]&EEdone) == 0) + ; + return c->reg[Eerd]>>16; +} + +static int +eeload(Ctlr *c) +{ + ushort u, v, p, l, i, j; + + if((eeread(c, 0)&0xc0) != 0x40) + return -1; + u = 0; + for(i = 0; i < 0x40; i++) + u += eeread(c, i); + for(i = 3; i < 0xf; i++){ + if(c->type == x540 && (i == 4 || i == 5)) + continue; + p = eeread(c, i); + l = eeread(c, p++); + if((int)p+l+1 > 0xffff) + continue; + for(j = p; j < p+l; j++) + u += eeread(c, j); + } + if(u != 0xbaba) + return -1; + if(c->reg[Status]&1<<3) + u = eeread(c, 10); + else + u = eeread(c, 9); + u++; + for(i = 0; ira[i++] = v; + c->ra[i++] = v>>8; + } + c->ra[5] += (c->reg[Status]&0xc)>>2; + return 0; +} + +static int +reset(Ctlr *c) +{ + uchar *p; + int i; + + if(detach(c)){ + print("%s: reset timeout\n", cname(c)); + return -1; + } + if(eeload(c)){ + print("%s: eeprom failure\n", cname(c)); + return -1; + } + p = c->ra; + c->reg[Ral] = p[3]<<24|p[2]<<16|p[1]<<8|p[0]; + c->reg[Rah] = p[5]<<8|p[4]|1<<31; + + c->reg[Ctrlext] |= 1<<16; + /* make some guesses for flow control */ + c->reg[Fcrtl] = 0x10000|1<<31; + c->reg[Fcrth] = 0x40000|1<<31; + c->reg[Rcrtv] = 0x6000; + + /* configure interrupt mapping (don't ask) */ + if(cttab[c->type].flag & Fsplitivar){ + c->reg[Ivar+0] = Ivrx; + c->reg[Ivar+64/4] = Ivtx; +// c->reg[Ivar+97/4] = (2|1<<7)<<8*(97%4); + }else + c->reg[Ivar+0] = Ivtx<<8 | Ivrx; + + /* interrupt throttling goes here. */ + for(i = Itr; ireg[i] = 128; /* ¼µs intervals */ + c->reg[Itr+Itx0] = 256; + return 0; +} + +static void +txinit(Ctlr *c) +{ + Msgbuf *b; + int i; + + c->reg[Txdctl] = 16<ntd; i++){ + b = c->tb[i]; + c->tb[i] = 0; + if(b) + mbfree(b); + } + memset(c->tdba, 0, c->ntd*sizeof(Td)); + c->reg[Tdbal] = PCIWADDR(c->tdba); + c->reg[Tdbah] = 0; + c->reg[Tdlen] = c->ntd*sizeof(Td); + c->reg[Tdh] = 0; + c->reg[Tdt] = 0; + c->tdh = c->ntd-1; + c->tdt = 0; + if(cttab[c->type].flag & Ftxctl) + c->reg[Dtxctl] |= Den; + c->reg[Txdctl] |= Ten; +} + +static void +attach(Ether *e) +{ + Ctlr *c; + int t; + + c = e->ctlr; + qlock(&c->alock); + if(c->alloc){ + qunlock(&c->alock); + return; + } + + c->nrd = Nrd; + c->ntd = Ntd; + t = c->nrd*sizeof *c->rdba+255; + t += c->ntd*sizeof *c->tdba+255; + t += (c->ntd+c->nrd)*sizeof(Msgbuf*); + c->alloc = ialloc(t, 0); + qunlock(&c->alock); + + c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256); + c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba+c->nrd), 256); + c->rb = (Msgbuf**)(c->tdba+c->ntd); + c->tb = (Msgbuf**)(c->rb+c->nrd); + + mballocpool(Nrb, c->rbsz+Rbalign, Rbalign, Mbeth10gbebg, freetab[c->poolno]); + + rxinit(c); + txinit(c); + + sprint(c->rname, "#l%dr", e->ctlrno); + userinit(rproc, e, c->rname); + sprint(c->tname, "#l%dt", e->ctlrno); + userinit(tproc, e, c->tname); +} + +static void +interrupt(Ureg*, void *v) +{ + Ether *e; + Ctlr *c; + int icr, im; + + e = v; + c = e->ctlr; + ilock(&c->imlock); + c->reg[Imc] = ~0; + im = c->im; + while(icr = c->reg[Icr]&c->im){ + if(icr&Lsc){ + im &= ~Lsc; + c->lim = icr&Lsc; + wakeup(&c->lrendez); + } + if(icr&Irx0){ + im &= ~Irx0; + c->rim = icr&Irx0; + wakeup(&c->rrendez); + } + if(icr&Itx0){ + im &= ~Itx0; + c->xtim = icr&Itx0; + wakeup(&c->trendez); + } + } + c->reg[Ims] = c->im = im; + iunlock(&c->imlock); +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +scan(void) +{ + char *name; + uintmem io; + int type; + void *mem; + Ctlr *c; + Pcidev *p; + + p = 0; + while(p = pcimatch(p, 0x8086, 0)){ + hbafixup(p); + switch(p->did){ + case 0x10c6: /* 82598 af dual port */ + case 0x10c7: /* 82598 af single port */ + case 0x10b6: /* 82598 backplane */ + case 0x10dd: /* 82598 at cx4 */ + case 0x10ec: /* 82598 at cx4 */ + type = i82598; + break; + case 0x10f7: /* 82599 kx/kx4 */ + case 0x10f8: /* 82599 backplane */ + case 0x10f9: /* 82599 cx4 */ + case 0x10fb: /* 82599 sfi/sfp+ */ + case 0x10fc: /* 82599 xaui */ + case 0x151c: /* 82599 base t kx/kx4 “niantic” */ + type = i82599; + break; + case 0x1528: /* x540-at2 “twinville” */ + type = x540; + break; + default: + continue; + } + name = cttab[type].name; + if(nctlr == nelem(ctlrtab)){ + print("%s: %τ: too many controllers\n", name, p->tbdf); + return; + } + io = p->mem[0].bar&~0xf; + mem = vmap(io, p->mem[0].size); + if(mem == 0){ + print("%s: %τ: cant map bar\n", name, p->tbdf); + continue; + } + c = ialloc(sizeof *c, 0); + c->p = p; + c->port = io; + c->reg = (u32int*)mem; + c->rbsz = cttab[type].mtu; + c->type = type; + if(reset(c)){ + print("%s: %τ: cant reset\n", name, p->tbdf); + // free(c); + // vunmap(mem, p->mem[0].size); + continue; + } + pcisetbme(p); + c->poolno = nctlr; + c->pool = rbtab + c->poolno; + ctlrtab[nctlr++] = c; + } +} + +int +i82598pnp(Ether *e) +{ + Ctlr *c; + int i; + + if(nctlr == 0) + scan(); + for(i = 0; iflag&Factive) + continue; + if(ethercfgmatch(e, c->p, c->port) == 0) + goto found; + } + return -1; +found: + c->flag |= Factive; + e->ctlr = c; + e->port = (uintptr)c->reg; + e->irq = c->p->intl; + e->tbdf = c->p->tbdf; + e->mbps = 10000; + e->ifc.maxmtu = c->rbsz; + memmove(e->ea, c->ra, Easize); +// e->arg = e; + e->attach = attach; + e->interrupt = interrupt; + e->transmit = transmit; + + return 0; +} --- /sys/src/fs/amd64/mp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/mp.c Tue Aug 27 16:10:58 2013 @@ -0,0 +1,479 @@ +#include "all.h" +#include "io.h" +#include "apic.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +#define l16get(p) (((p)[1]<<8)|(p)[0]) +#define l32get(p) (((u32int)l16get(p+2)<<16)|l16get(p)) +#define l64get(p) (((u64int)l32get(p+4)<<32)|l32get(p)) + +/* + * MultiProcessor Specification Version 1.[14]. + */ +typedef struct { /* MP Floating Pointer */ + u8int signature[4]; /* "_MP_" */ + u8int addr[4]; /* PCMP */ + u8int length; /* 1 */ + u8int revision; /* [14] */ + u8int checksum; + u8int feature[5]; +} _MP_; + +typedef struct { /* MP Configuration Table */ + u8int signature[4]; /* "PCMP" */ + u8int length[2]; + u8int revision; /* [14] */ + u8int checksum; + u8int string[20]; /* OEM + Product ID */ + u8int oaddr[4]; /* OEM table pointer */ + u8int olength[2]; /* OEM table length */ + u8int entry[2]; /* entry count */ + u8int apicpa[4]; /* local APIC address */ + u8int xlength[2]; /* extended table length */ + u8int xchecksum; /* extended table checksum */ + u8int reserved; + + u8int entries[]; +} PCMP; + +typedef struct { + char type[6]; + int polarity; /* default for this bus */ + int trigger; /* default for this bus */ +} Mpbus; + +static Mpbus mpbusdef[] = { + { "PCI ", IPlow, TMlevel, }, + { "ISA ", IPhigh, TMedge, }, +}; +static Mpbus* mpbus[Nbus]; + +static void +mpintrprint(char* s, u8int* p) +{ + char buf[128], *b, *e; + char format[] = " type %d flags %#ux bus %d IRQ %d APIC %d INTIN %d\n"; + + b = buf; + e = b + sizeof(buf); + b = seprint(b, e, "mpparse: intr:"); + if(s != nil) + b = seprint(b, e, " %s:", s); + seprint(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]); + print(buf); +} + +static u32int +mpmkintr(u8int* p) +{ + u32int v; + Apic *apic; + int n, polarity, trigger; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(mpbus[p[4]] == nil){ + mpintrprint("no source bus", p); + return 0; + } + if(p[6] != 0xff){ + if(Napic < 256 && p[6] >= Napic){ + mpintrprint("APIC ID out of range", p); + return 0; + } + switch(p[0]){ + default: + mpintrprint("INTIN botch", p); + return 0; + case 3: /* IOINTR */ + if((apic = ioapiclookup(p[6])) == nil){ + mpintrprint("unuseable ioapic", p); + return 0; + } + if(p[7] >= apic->nrdt){ + mpintrprint("IO INTIN out of range", p); + return 0; + } + break; + case 4: /* LINTR */ + if((apic = lapiclookup(p[6])) == nil){ + mpintrprint("unuseable lapic", p); + return 0; + } + if(p[7] >= nelem(apic->lvt)){ + mpintrprint("LOCAL INTIN out of range", p); + return 0; + } + USED(apic); + break; + } + } + n = l16get(p+2); + if((polarity = (n & 0x03)) == 2 || (trigger = ((n>>2) & 0x03)) == 2){ + mpintrprint("invalid polarity/trigger", p); + return 0; + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(p[1]){ + default: + mpintrprint("invalid type", p); + return 0; + case 0: /* INT */ + switch(polarity){ + case 0: + v |= mpbus[p[4]]->polarity; + break; + case 1: + v |= IPhigh; + break; + case 3: + v |= IPlow; + break; + } + switch(trigger){ + case 0: + v |= mpbus[p[4]]->trigger; + break; + case 1: + v |= TMedge; + break; + case 3: + v |= TMlevel; + break; + } + break; + case 1: /* NMI */ + v |= TMedge|IPhigh|MTnmi; + break; + case 2: /* SMI */ + v |= TMedge|IPhigh|MTsmi; + break; + case 3: /* ExtINT */ + v |= TMedge|IPhigh|MTei; + break; + } + + return v; +} + +static int +mpparse(PCMP* pcmp, int maxmach) +{ + u8int *e, *p; + int nmach, bustype, i, n; + u32int lo; + Apic *a; + + nmach = 0; + p = pcmp->entries; + e = ((uchar*)pcmp)+l16get(pcmp->length); + while(p < e) switch(*p){ + default: + print("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e-p); + for(i = 0; p < e; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 0: /* processor */ + /* + * Initialise the APIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, the memory mapped address comes + * from the PCMP structure as the addess is local to the + * CPU and identical for all. Indicate whether this is + * the bootstrap processor (p[3] & 0x02). + */ + DBG("mpparse: cpu %d pa %#ux bp %d\n", + p[1], l32get(pcmp->apicpa), p[3] & 0x02); + if((p[3] & 0x01) != 0 && nmach < maxmach){ + nmach++; + lapicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02); + } + p += 20; + break; + case 1: /* bus */ + DBG("mpparse: bus: %d type %6.6s\n", p[1], (char*)p+2); + if(mpbus[p[1]] != nil){ + print("mpparse: bus %d already allocated\n", p[1]); + p += 8; + break; + } + for(i = 0; i < nelem(mpbusdef); i++){ + if(memcmp(p+2, mpbusdef[i].type, 6) != 0) + continue; + mpbus[p[1]] = &mpbusdef[i]; + break; + } + if(mpbus[p[1]] == nil) + print("mpparse: bus %d type %6.6s unknown\n", + p[1], (char*)p+2); + + p += 8; + break; + case 2: /* IOAPIC */ + /* + * Initialise the IOAPIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, p[4-7] is the memory mapped address. + */ + if(p[3] & 0x01) + ioapicinit(p[1], -1, l32get(p+4)); + + p += 8; + break; + case 3: /* IOINTR */ + /* + * p[1] is the interrupt type; + * p[2-3] contains the polarity and trigger mode; + * p[4] is the source bus; + * p[5] is the IRQ on the source bus; + * p[6] is the destination APIC; + * p[7] is the INITIN pin on the destination APIC. + */ + if(p[6] == 0xff){ + mpintrprint("routed to all IOAPICs", p); + p += 8; + break; + } + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + bustype = -1; + if(memcmp(mpbus[p[4]]->type, "PCI ", 6) == 0) + bustype = BusPCI; + else if(memcmp(mpbus[p[4]]->type, "ISA ", 6) == 0) + bustype = BusISA; + if(bustype != -1) + ioapicintrinit(bustype, p[4], p[6], p[7], p[5], lo); + + p += 8; + break; + case 4: /* LINTR */ + /* + * Format is the same as IOINTR above. + */ + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Everything was checked in mpmkintr above. + */ + if(p[6] == 0xff){ + for(i = 0; i < Napic; i++){ + if((a = lapiclookup(i)) == nil || a->addr != nil) + continue; + a->lvt[p[7]] = lo; + } + } + else{ +// xlapic[p[6]].lvt[p[7]] = lo; + if((a = lapiclookup(p[6])) != nil) + a->lvt[p[7]] = lo; + } + p += 8; + break; + } + + /* + * There's nothing of interest in the extended table, + * but check it for consistency. + */ + p = e; + e = p + l16get(pcmp->xlength); + while(p < e) switch(*p){ + default: + n = p[1]; + print("mpparse: unknown extended entry %d length %d\n", *p, n); + for(i = 0; i < n; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 128: + DBG("address space mapping\n"); + DBG(" bus %d type %d base %#llux length %#llux\n", + p[2], p[3], l64get(p+4), l64get(p+12)); + p += p[1]; + break; + case 129: + DBG("bus hierarchy descriptor\n"); + DBG(" bus %d sd %d parent bus %d\n", + p[2], p[3], p[4]); + p += p[1]; + break; + case 130: + DBG("compatibility bus address space modifier\n"); + DBG(" bus %d pr %d range list %d\n", + p[2], p[3], l32get(p+4)); + p += p[1]; + break; + } + return nmach; +} + +static int +sigchecksum(void* address, int length) +{ + u8int *p, sum; + + sum = 0; + for(p = address; length-- > 0; p++) + sum += *p; + + return sum; +} + +static void* +sigscan(u8int* address, int length, char* signature) +{ + u8int *e, *p; + int siglength; + + e = address+length; + siglength = strlen(signature); + for(p = address; p+siglength < e; p += 16){ + if(memcmp(p, signature, siglength)) + continue; + return p; + } + + return nil; +} + +static uintptr mptab[] = {0, 1024, 639*1024, 1024, 0xf0000, 0x10000, 0, 1024}; + +static void* +sigsearch(char* signature) +{ + int i; + uintmem p; + u8int *bda; + void *r; + + /* + * Search for the data structure: + * 1) in the first KB of the EBDA; + * 2) in the last KB of system base memory; + * 3) in the BIOS ROM between 0xe0000 and 0xfffff. + */ + bda = BIOSSEG(0x40); + if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){ + if((p = (bda[0x0f]<<8)|bda[0x0e])){ + if((r = sigscan(BIOSSEG(p), 1024, signature)) != nil) + return r; + } + } + + if((p = (bda[0x0F]<<8|bda[0x0E])<<4) || + (p = (bda[0x14]<<8|bda[0x13])*1024-1024)) + mptab[nelem(mptab)-2] = (uintptr)p; + for(i = 0; i < nelem(mptab); i += 2) + if(r = sigscan(KADDR(mptab[i]), mptab[i+1], signature)) + return r; + return nil; +} + +void +mpsinit(int maxmach) +{ + u8int *p; + int i, n; + _MP_ *mp; + PCMP *pcmp; + +#ifdef acpiworking + mpacpi(maxmach); +#endif + + if((mp = sigsearch("_MP_")) == nil){ + print("mp: no tables\n"); + return; + } + if(DBGFLG){ + DBG("_MP_ @ %#p, addr %#ux length %ud rev %d", + mp, l32get(mp->addr), mp->length, mp->revision); + for(i = 0; i < sizeof(mp->feature); i++) + DBG(" %2.2#ux", mp->feature[i]); + DBG("\n"); + } + if(mp->revision != 1 && mp->revision != 4) + return; + if(sigchecksum(mp, mp->length*16) != 0) + return; + + if((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == nil) + return; + if(pcmp->revision != 1 && pcmp->revision != 4){ + vunmap(pcmp, sizeof(PCMP)); + return; + } + n = l16get(pcmp->length) + l16get(pcmp->xlength); + vunmap(pcmp, sizeof(PCMP)); + if((pcmp = vmap(l32get(mp->addr), n)) == nil) + return; + if(sigchecksum(pcmp, l16get(pcmp->length)) != 0){ + vunmap(pcmp, n); + return; + } + if(DBGFLG){ + DBG("PCMP @ %#p length %#ux revision %d\n", + pcmp, l16get(pcmp->length), pcmp->revision); + DBG(" %20.20s oaddr %#ux olength %#ux\n", + (char*)pcmp->string, l32get(pcmp->oaddr), + l16get(pcmp->olength)); + DBG(" entry %d apicpa %#ux\n", + l16get(pcmp->entry), l32get(pcmp->apicpa)); + + DBG(" xlength %#ux xchecksum %#ux\n", + l16get(pcmp->xlength), pcmp->xchecksum); + } + if(pcmp->xchecksum != 0){ + p = ((u8int*)pcmp) + l16get(pcmp->length); + i = sigchecksum(p, l16get(pcmp->xlength)); + if(((i+pcmp->xchecksum) & 0xff) != 0){ + print("mp: extended table checksums to %#ux\n", i); + vunmap(pcmp, n); + return; + } + } + + /* + * Parse the PCMP table and set up the datastructures + * for later interrupt enabling and application processor + * startup. + */ + mpparse(pcmp, maxmach); + lapicdump(); + ioapicdump(); +} --- /sys/src/fs/amd64/mmu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/mmu.c Tue Aug 27 16:10:58 2013 @@ -0,0 +1,447 @@ +#include "all.h" + +#include "amd64.h" + +static int dbgflg = 0; +#define DBG(...) do{if(dbgflg)print(__VA_ARGS__);}while(0) + +static void* +malloc(usize bytes) +{ + void *va; + + va = ialloc(bytes, 0); + if(va == nil) + panic("malloc: %#p\n", getcallerpc(&bytes)); + return va; +} +void* +mallocalign(usize bytes, uint align, uint, uint) +{ + void *va; + + va = ialloc(bytes, align); + if(va == nil) + panic("mallocalign: %#p\n", getcallerpc(&bytes)); + return va; +} + +typedef struct Page Page; +struct Page +{ +// Lock; + uintmem pa; /* Physical address in memory */ + uintptr va; /* Virtual address for user */ +// uint daddr; /* Disc address on swap */ +// int ref; /* Reference count */ +// uchar modref; /* Simulated modify/reference bits */ +// int color; /* Cache coloring */ +// char cachectl[MACHMAX]; /* Cache flushing control for mmuput */ +// Image *image; /* Associated text or swap image */ +// Page *next; /* Lru free list */ +// Page *prev; +// Page *hash; /* Image hash chains */ +// int pgszi; /* size index in m->pgsz[] */ +}; + +/* + * To do: + * PteNX; + * mmukmapsync grot for >1 processor; + * mmuptcopy (PteSHARED trick?); + */ + +#define PPN(x) ((x)&~(PGSZ-1)) + +/* + * set up a pat mappings. the system depends + * on the first 4 mappings not changing. + */ +enum{ + Patmsr = 0x277, +}; + +static uchar pattab[8] = { + PATWB, + PATWT, + PATUCMINUS, + PATUC, + + PATWB, + PATWT, + PATUCMINUS, + PATUC, +}; + +static uint patflags[8] = { + 0, + PtePWT, + PtePCD, + PtePCD | PtePWT, + Pte4KPAT, + Pte4KPAT | PtePWT, + Pte4KPAT | PtePCD, + Pte4KPAT | PtePCD | PtePWT, +}; + +static void +setpatreg(int rno, int type) +{ + int i; + Mpl s; + u64int pat; + + s = splhi(); + pat = rdmsr(Patmsr); + pat &= ~(0xffull<machno == 0) + print("pat: %.16llux\n", pat); + for(i = 0; i < 64; i += 8) + pattab[i>>3] = pat>>i; +} + +static void +patinit(void) +{ + setpatreg(7, PATWC); +} + +/* adjust memory flags based on page table level (bits shift around) */ +static uint +memflagssz(uint flag, int ps) +{ + if(flag & Pte4KPAT && ps > 4*1024){ + flag &= ~Pte4KPAT; + flag |= Pte2MPAT | PtePS; + } + else if(ps > 4*1024) + flag |= PtePS; + return flag; +} + +void +dumpmmuwalk(uintmem addr) +{ + int l; + PTE *pte, *pml4; + + pml4 = UINT2PTR(m->pml4->va); + if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); +} + +static Lock mmukmaplock; +static Lock vmaplock; + +#define PML4X(v) PTLX((v), 3) +#define PDPX(v) PTLX((v), 2) +#define PDX(v) PTLX((v), 1) +#define PTX(v) PTLX((v), 0) + +int +mmukmapsync(uvlong va) +{ + USED(va); + + return 0; +} + +/* allocate page directories &c for vmaps */ +static uintmem +walkalloc(usize size) +{ + void *va; + + if((va = mallocalign(size, PTSZ, 0, 0)) != nil) + return PADDR(va); + panic("walkalloc: fail"); + return 0; +} + +uintptr +kseg2map(uintmem pa, uintmem len, uint basef) +{ + int i, l; + uintptr va; + uintmem mem, nextmem; + PTE *pte, *pml4; + + DBG("kseg2map: %#P %#P size %P\n", pa, pa+len, len); + pml4 = UINT2PTR(m->pml4->va); + va = KSEG2+pa; + for(mem = pa; mem < pa+len; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > pa+len) + continue; + if((l = mmuwalk(pml4, va, i, &pte, walkalloc)) != i){ + /* + * since we don't track vmap()s that overlap after + * rounding up to full pages, we have to be sloppy here + * and just say "close enough". should we check that the + * memory flags are the same? + */ + DBG("mmu: kseg2map: %#P: %d %d\n", mem, i, l); + /* i = l; ? */ + }else + *pte = mem|memflagssz(basef, PGLSZ(i)); + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + break; + } + } + return KSEG2+pa; +} + +void* +vmapflags(uintmem pa, usize size, uint flags) +{ + uintptr va; + usize o, sz; + + DBG("%d: vmapflags(%#P, %lud, %ux)\n", m->machno, pa, size, flags); + + /* Might be asking for less than a page. */ + o = pa & ((1<machno, pa, sz, flags, va, o); + return UINT2PTR(va + o); +} + + +void +vunmap(void* v, usize size) +{ + uintptr va; + + DBG("vunmap(%#p, %lud)\n", v, size); + + if(m->machno != 0) + panic("vunmap"); + + /* See the comments above in vmap. */ + va = PTR2UINT(v); + if(va >= KZERO && va+size < KZERO+1ull*MiB) + return; + + /* missing implementation */ + DBG("vunmap(%#p, %lud)\n", v, size); +} + +void* +vmap(uintmem pa, usize size) +{ + DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa)); + return vmapflags(pa, size, PtePCD|PteRW); +} + +void* +vmappat(uintmem pa, usize size, uint pattype) +{ + int i; + + DBG("vmappat(%#p, %lud, %#ux) pc=%#p\n", pa, size, pattype, getcallerpc(&pa)); + for(i = 0; i < nelem(pattab); i++) + if(pattab[i] == pattype) + return vmapflags(pa, size, patflags[i]|PteRW); + return vmap(pa, size); +} + +int +mmuwalk(PTE* pml4, uintptr va, int level, PTE** ret, uintmem (*alloc)(usize)) +{ + int l; + uintmem pa; + PTE *pte; + Mpl pl; + + pl = splhi(); + DBG("mmuwalk%d: va %#p level %d\n", m->machno, va, level); + pte = &pml4[PTLX(va, 3)]; + for(l = 3; l >= 0; l--){ + if(l == level) + break; + if(!(*pte & PteP)){ + if(alloc == nil) + break; + pa = alloc(PTSZ); + if(pa == ~0) + return -1; + memset(UINT2PTR(KADDR(pa)), 0, PTSZ); + *pte = pa|PteRW|PteP; + } + else if(*pte & PtePS) + break; + pte = UINT2PTR(KADDR(PPN(*pte))); + pte += PTLX(va, l-1); + } + *ret = pte; + splx(pl); + + return l; +} + +uintmem +mmuphysaddr(uintptr va) +{ + int l; + PTE *pte; + uintmem mask, pa; + + /* + * Given a VA, find the PA. + * This is probably not the right interface, + * but will do as an experiment. Usual + * question, should va be void* or uintptr? + */ + l = mmuwalk(UINT2PTR(m->pml4->va), va, 0, &pte, nil); + DBG("physaddr: va %#p l %d\n", va, l); + if(l < 0) + return ~0; + + mask = PGLSZ(l)-1; + pa = (*pte & ~mask) + (va & mask); + + DBG("physaddr: l %d va %#p pa %#P\n", l, va, pa); + + return pa; +} + +Page mach0pml4; + +static void +nxeon(void) +{ + Cpuidreg r; + + /* on intel64, cpuid 0x8::1 DX bit 20 means "Nxe bit in Efer allowed" */ + r = (Cpuidreg){0x80000001, 0, 0, 0}; + cpuid(&r); + if(r.cx & (1<<20)) + wrmsr(Efer, rdmsr(Efer) | Nxe); +} + +static void +mapmem(PTE* pml4) +{ + int j, i, l; + uintptr va; + uintmem lo, hi, mem, nextmem; + Mbank *b; + PTE *pte; + + /* everything else mapped at kseg2 s.t. pa = va - KSEG2 */ + for(j = 1; j < mconf.nbank; j++){ + b = mconf.bank + j; + va = KSEG2+b->base; + + lo = b->base; + hi = b->limit; + print("mapmem: mem %#P %#P size %P\n", lo, hi, b->limit-b->base); + /* Convert a range into pages */ + for(mem = lo; mem < hi; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + /* Try large pages first */ + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > hi) + continue; + if((l = mmuwalk(pml4, va, i, &pte, walkalloc)) < 0) + panic("mapmem: mmuwalk"); + *pte = mem|PteRW|PteP|PteG; + if(l > 0) + *pte |= PtePS; + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + break; + } + } + } +} + +void +apmmuinit(void) +{ + uchar *p; + + archmmu(); + /* + * NIX: KLUDGE: Has to go when each mach is using + * its own page table + */ + p = UINT2PTR(m->stack); + p += MACHSTKSZ; + + memmove(p, UINT2PTR(mach0pml4.va), PTSZ); + assert(sizeof(Page) <= sizeof(m->pml4kludge)); + m->pml4 = (Page*)m->pml4kludge; + m->pml4->va = PTR2UINT(p); + m->pml4->pa = PADDR(p); +// m->pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */ + + nxeon(); + patinit(); + putcr3(m->pml4->pa); +} + +void +mmuinit(void) +{ + Page *page; + + assert(m->machno == 0); + archmmu(); + DBG("mach%d: %#p pml4 %#p npgsz %d\n", m->machno, m, m->pml4, m->npgsz); + + page = &mach0pml4; + page->pa = getcr3(); + page->va = PTR2UINT(KADDR(page->pa)); + + m->pml4 = page; + + nxeon(); + patinit(); + + if(dbgflg) + dumpmmuwalk(KZERO); + mmuphysaddr(PTR2UINT(end)); + mapmem(UINT2PTR(m->pml4->va)); +} --- /sys/src/fs/amd64/sipi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/sipi.c Tue Aug 27 16:10:58 2013 @@ -0,0 +1,98 @@ +#include "all.h" +#include "apic.h" +#include "sipi.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +#define SIPIHANDLER (KZERO+0x3000) + +void +sipi(void) +{ + Apic *apic; + Mach *mach; + int apicno, i, nproc; + u32int *sipiptr; + uintmem sipipa; + u8int *alloc, *p; + extern void squidboy(int); + + /* + * Move the startup code into place, + * must be aligned properly. + */ + sipipa = mmuphysaddr(SIPIHANDLER); + if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB)) + panic("sipi: invalid sipipa"); + sipiptr = UINT2PTR(SIPIHANDLER); + memmove(sipiptr, sipihandler, sizeof(sipihandler)); + DBG("sipiptr %#p sipipa %#P\n", sipiptr, sipipa); + + /* + * Notes: + * The Universal Startup Algorithm described in the MP Spec. 1.4. + * The data needed per-processor is the sum of the stack, page + * table pages, vsvm page and the Mach page. The layout is similar + * to that described in data.h for the bootstrap processor, but + * with any unused space elided. + */ + nproc = 0; + for(apicno = 0; apicno < Napic; apicno++){ + if((apic = lapiclookup(apicno)) == nil || apic->addr != 0 || apic->machno == 0) + continue; + nproc++; + if(nproc == MACHMAX){ + print("sipi: MACHMAX too small %d\n", nproc); + break; + } + + /* + * NOTE: for now, share the page tables with the + * bootstrap processor, until the lsipi code is worked out, + * so only the Mach and stack portions are used below. + */ + alloc = ialloc(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096); + if(alloc == nil) + continue; + p = alloc+MACHSTKSZ; + + sipiptr[-1] = mmuphysaddr(PTR2UINT(p)); + DBG("p %#p sipiptr[-1] %#ux\n", p, sipiptr[-1]); + + p += 4*PTSZ+4*KiB; + + /* + * Committed. If the AP startup fails, can't safely + * release the resources, who knows what mischief + * the AP is up to. Perhaps should try to put it + * back into the INIT state? + */ + mach = (Mach*)p; + mach->machno = apic->machno; /* NOT one-to-one... */ + mach->splpc = PTR2UINT(squidboy); + mach->apicno = apicno; + mach->stack = PTR2UINT(alloc); + mach->vsvm = alloc+MACHSTKSZ+4*PTSZ; + + p = KADDR(0x467); + *p++ = sipipa; + *p++ = sipipa>>8; + *p++ = 0; + *p = 0; + + nvramwrite(0x0f, 0x0a); + lapicsipi(apicno, sipipa); + + for(i = 0; i < 5000; i += 5){ + if(mach->online) + break; + delay(5); + } + nvramwrite(0x0f, 0x00); + + DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n", + mach, sys->machptr[mach->machno], + apicno, mach->machno, mach->cpumhz); + } +} --- /sys/src/fs/amd64/l64sipi.s Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/l64sipi.s Tue Aug 27 16:10:59 2013 @@ -0,0 +1,235 @@ +/* + * Start-up request IPI handler. + * + * This code is executed on an application processor in response to receiving + * a Start-up IPI (SIPI) from another processor. + * This must be placed on a 4KiB boundary + * somewhere in the 1st MiB of conventional memory. However, + * due to some shortcuts below it's restricted further to within the 1st 64KiB. + * The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KiB; + * CPL and IP set to 0. + */ +#include "mem.h" +#include "amd64l.h" + +/* + * Some machine instructions not handled well by [68][al]. + * This is a messy piece of code, requiring instructions in real mode, + * protected mode (+long mode on amd64). The MODE psuedo-op of 6[al] handles + * the latter two OK, but 'MODE $16' is incomplete, e.g. it does + * not truncate operands appropriately, hence the ugly 'rMOVAX' macro. + * Fortunately, the only other instruction executed in real mode that + * could cause a problem (ORL) is encoded such that it will work OK. + */ +#define DELAY BYTE $0xeb; /* JMP .+2 */ \ + BYTE $0x00 +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s + +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +/* + * Real mode. Welcome to 1978. + * Load a basic GDT, turn on protected mode and make + * inter-segment jump to the protected mode code. + */ +MODE $16 + +TEXT _real<>(SB), 1, $-4 + rFARJMP16(0, _endofheader<>-KZERO(SB)) /* */ + +_startofheader: + NOP; NOP; NOP + QUAD $0xa5a5a5a5a5a5a5a5 + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) /* includes long mode */ + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + QUAD $0x0000800000000000 /* DS */ + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +TEXT _endofheader<>(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $Pe, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rMOVAX (SSEL(SiDS, SsTIGDT|SsRPL0)) /* */ + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + rFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _protected<>-KZERO(SB)) + +/* + * Protected mode. Welcome to 1982. + * Get the local APIC ID from the memory mapped APIC; + * load the PML4 with the shared page table address; + * make an identity map for the inter-segment jump below, + * using the stack space to hold a temporary PDP and PD; + * enable and activate long mode; + * make an inter-segment jump to the long mode code. + */ +MODE $32 + +/* + * Macros for accessing page table entries; must turn + * the C-style array-index macros into a page table byte + * offset. + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _protected<>(SB), 1, $-4 + MOVL $0xfee00000, BP /* apicbase */ + MOVL 0x20(BP), BP /* Id */ + SHRL $24, BP /* becomes RARG later */ +//MOVL $_real<>-KZERO(SB), CX +//MOVL BX, -4(CX) +//_spin: JMP _spin + + MOVL $(0x00100000+MACHSTKSZ), SI /* page table PML4 */ + + MOVL SI, AX + MOVL AX, CR3 /* load the mmu */ + + MOVL AX, DX + SUBL $MACHSTKSZ, DX /* PDP for identity map */ + ADDL $(PteRW|PteP), DX + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + + SUBL $MACHSTKSZ, AX /* PDP for identity map */ + ADDL $PTSZ, DX + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL $(PtePS|PteRW|PteP), DX + ADDL $PTSZ, AX /* PD for identity map */ + MOVL DX, PDO(0)(AX) /* PDE for identity 0-[24]MiB */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack and call the + * C startup code in m->splpc. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* PML4-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + MOVQ DX, PML4O(0)(AX) /* zap identity map */ + + MOVQ SI, CR3 /* flush TLB */ +#ifndef UseOwnPageTables + /* + * SI still points to the base of the bootstrap + * processor page tables. + * Want to use that for clearing the identity map, + * but want to use the passed-in address for + * setting up the stack and Mach. + */ + MOVQ $_real<>(SB), AX + MOVL -4(AX), SI /* PML4 */ + MOVLQZX SI, SI /* PML4-KZERO */ +#endif + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + ADDQ $(4*PTSZ+4*KiB), AX /* PML4+PDP+PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX RARG, RARG /* APIC ID */ + PUSHQ RARG /* apicno */ + + MOVQ 8(RMACH), AX /* m->splpc */ + CALL* AX /* CALL squidboy(SB) */ + +_ndnr: + JMP _ndnr --- /sys/src/fs/amd64/apic.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/apic.h Tue Aug 27 16:10:59 2013 @@ -0,0 +1,98 @@ +/* + * There are 2 flavours of APIC, Local APIC and IOAPIC, + * Each I/O APIC has a unique physical address, + * Local APICs are all at the same physical address as they can only be + * accessed by the local CPU. APIC ids are unique to the + * APIC type, so an IOAPIC and APIC both with id 0 is ok. + */ +typedef struct Ioapic Ioapic; +typedef struct Lapic Lapic; +typedef struct Apic Apic; + +struct Ioapic { + Lock; /* register access */ + u32int* addr; /* register base */ + uintmem paddr; /* register base */ + int nrdt; /* size of RDT */ + int ibase; /* global interrupt base */ +}; + +struct Lapic { + int machno; /* APIC */ + + u32int lvt[7]; + int nlvt; + int ver; + + vlong hz; /* APIC Timer frequency */ + vlong max; + vlong min; + vlong div; +}; + +struct Apic { + int useable; /* en */ + Ioapic; + Lapic; +}; + +enum { + Nbus = 256, /* must be 256 */ + Napic = 254, /* xAPIC architectural limit */ + Nrdt = 128, +}; + +/* + * Common bits for + * IOAPIC Redirection Table Entry (RDT); + * APIC Local Vector Table Entry (LVT); + * APIC Interrupt Command Register (ICR). + * [10:8] Message Type + * [11] Destination Mode (RW) + * [12] Delivery Status (RO) + * [13] Interrupt Input Pin Polarity (RW) + * [14] Remote IRR (RO) + * [15] Trigger Mode (RW) + * [16] Interrupt Mask + */ +enum { + MTf = 0x00000000, /* Fixed */ + MTlp = 0x00000100, /* Lowest Priority */ + MTsmi = 0x00000200, /* SMI */ + MTrr = 0x00000300, /* Remote Read */ + MTnmi = 0x00000400, /* NMI */ + MTir = 0x00000500, /* INIT/RESET */ + MTsipi = 0x00000600, /* Startup IPI */ + MTei = 0x00000700, /* ExtINT */ + + Pm = 0x00000000, /* Physical Mode */ + Lm = 0x00000800, /* Logical Mode */ + + Ds = 0x00001000, /* Delivery Status */ + IPhigh = 0x00000000, /* IIPP High */ + IPlow = 0x00002000, /* IIPP Low */ + Rirr = 0x00004000, /* Remote IRR */ + TMedge = 0x00000000, /* Trigger Mode Edge */ + TMlevel = 0x00008000, /* Trigger Mode Level */ + Im = 0x00010000, /* Interrupt Mask */ +}; + +void apictimerenab(void); +int gsitoapicid(int, uint*); +void ioapicdump(void); +Apic* ioapicinit(int, int, uintmem); +void ioapicintrinit(int, int, int, int, int, u32int); +Apic* ioapiclookup(uint); +void ioapiconline(void); +void lapicdump(void); +int lapiceoi(int); +void lapicinit(int, uintmem, int); +void lapicipi(int); +int lapicisr(int); +Apic* lapiclookup(uint); +int lapiconline(void); +void lapicpri(int); +void lapicsipi(int, uintmem); + +int pcimsienable(Pcidev*, uvlong); +int pcimsimask(Pcidev*, int); --- /sys/src/fs/amd64/lapic.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/lapic.c Tue Aug 27 16:11:00 2013 @@ -0,0 +1,416 @@ +#include "all.h" + +#include "apic.h" +#include "io.h" +#include "ureg.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +enum { /* Local APIC registers */ + Id = 0x0020, /* Identification */ + Ver = 0x0030, /* Version */ + Tp = 0x0080, /* Task Priority */ + Ap = 0x0090, /* Arbitration Priority */ + Pp = 0x00a0, /* Processor Priority */ + Eoi = 0x00b0, /* EOI */ + Ld = 0x00d0, /* Logical Destination */ + Df = 0x00e0, /* Destination Format */ + Siv = 0x00f0, /* Spurious Interrupt Vector */ + Is = 0x0100, /* Interrupt Status (8) */ + Tmode = 0x0180, /* Trigger Mode (8) */ + Ir = 0x0200, /* Interrupt Request (8) */ + Es = 0x0280, /* Error Status */ + Iclo = 0x0300, /* Interrupt Command */ + Ichi = 0x0310, /* Interrupt Command [63:32] */ + Lvt0 = 0x0320, /* Local Vector Table 0 */ + Lvt5 = 0x0330, /* Local Vector Table 5 */ + Lvt4 = 0x0340, /* Local Vector Table 4 */ + Lvt1 = 0x0350, /* Local Vector Table 1 */ + Lvt2 = 0x0360, /* Local Vector Table 2 */ + Lvt3 = 0x0370, /* Local Vector Table 3 */ + Tic = 0x0380, /* Timer Initial Count */ + Tcc = 0x0390, /* Timer Current Count */ + Tdc = 0x03e0, /* Timer Divide Configuration */ + + Tlvt = Lvt0, /* Timer */ + Lint0 = Lvt1, /* Local Interrupt 0 */ + Lint1 = Lvt2, /* Local Interrupt 1 */ + Elvt = Lvt3, /* Error */ + Pclvt = Lvt4, /* Performance Counter */ + Tslvt = Lvt5, /* Thermal Sensor */ +}; + +enum { /* Siv */ + Swen = 0x00000100, /* Software Enable */ + Fdis = 0x00000200, /* Focus Disable */ +}; + +enum { /* Iclo */ + Lassert = 0x00004000, /* Assert level */ + + DSnone = 0x00000000, /* Use Destination Field */ + DSself = 0x00040000, /* Self is only destination */ + DSallinc = 0x00080000, /* All including self */ + DSallexc = 0x000c0000, /* All Excluding self */ +}; + +enum { /* Tlvt */ + Periodic = 0x00020000, /* Periodic Timer Mode */ +}; + +enum { /* Tdc */ + DivX2 = 0x00000000, /* Divide by 2 */ + DivX4 = 0x00000001, /* Divide by 4 */ + DivX8 = 0x00000002, /* Divide by 8 */ + DivX16 = 0x00000003, /* Divide by 16 */ + DivX32 = 0x00000008, /* Divide by 32 */ + DivX64 = 0x00000009, /* Divide by 64 */ + DivX128 = 0x0000000a, /* Divide by 128 */ + DivX1 = 0x0000000b, /* Divide by 1 */ +}; + +static u32int* lapicbase; +static int lapmachno = 1; + +static Apic xlapic[Napic]; + +Apic* +lapiclookup(uint id) +{ + Apic *a; + + if(id > nelem(xlapic)) + return nil; + a = xlapic + id; + if(a->useable) + return a; + return nil; +} + +static u32int +lapicrget(int r) +{ + return lapicbase[r/4]; +} + +static void +lapicrput(int r, u32int data) +{ + lapicbase[r/4] = data; +} + +int +lapiceoi(int vecno) +{ + lapicrput(Eoi, 0); + return vecno; +} + +int +lapicisr(int vecno) +{ + int isr; + + isr = lapicrget(Is + (vecno/32)*16); + + return isr & (1<<(vecno%32)); +} + +static char* +lapicprint(char *p, char *e, Lapic *a, int i) +{ + char *s; + + s = "proc"; + p = seprint(p, e, "%-8s ", s); + p = seprint(p, e, "%8ux ", i); +// p = seprint(p, e, "%.8ux ", a->dest); +// p = seprint(p, e, "%.8ux ", a->mask); +// p = seprint(p, e, "%c", a->flags & PcmpBP? 'b': ' '); +// p = seprint(p, e, "%c ", a->flags & PcmpEN? 'e': ' '); +// p = seprint(p, e, "%8ux %8ux", a->lintr[0], a->lintr[1]); + p = seprint(p, e, "%12d\n", a->machno); + return p; +} + +void +lapicinit(int lapicno, uintmem pa, int isbp) +{ + Apic *apic; + + /* + * Mark the LAPIC useable if it has a good ID + * and the registers can be mapped. + * The LAPIC Extended Broadcast and ID bits in the HyperTransport + * Transaction Control register determine whether 4 or 8 bits + * are used for the LAPIC ID. There is also xLAPIC and x2LAPIC + * to be dealt with sometime. + */ + DBG("lapicinit: lapicno %d pa %#P isbp %d caller %#p\n", lapicno, pa, isbp, getcallerpc(&lapicno)); + + if(lapicno >= Napic){ + panic("lapicinit%d: out of range", lapicno); + return; + } + if((apic = &xlapic[lapicno])->useable){ + print("lapicinit%d: already initialised\n", lapicno); + return; + } + if(lapicbase == nil){ +// adrmapck(pa, 1024, Aapic, Mfree); + if((lapicbase = vmap(pa, 1024)) == nil){ + panic("lapicinit%d: can't map lapicbase %#P", lapicno, pa); + return; + } + DBG("lapicinit%d: lapicbase %#P -> %#p\n", lapicno, pa, lapicbase); + } + apic->useable = 1; + + /* + * Assign a machno to the processor associated with this + * LAPIC, it may not be an identity map. + * Machno 0 is always the bootstrap processor. + */ + if(isbp){ + apic->machno = 0; + m->apicno = lapicno; + } + else + apic->machno = lapmachno++; +} + +static void +lapicdump0(Apic *apic, int i) +{ + if(!apic->useable || apic->addr != 0) + return; + DBG("lapic%d: machno %d lint0 %#8.8ux lint1 %#8.8ux\n", + i, apic->machno, apic->lvt[0], apic->lvt[1]); + DBG(" tslvt %#8.8ux pclvt %#8.8ux elvt %#8.8ux\n", + lapicrget(Tslvt), lapicrget(Pclvt), lapicrget(Elvt)); + DBG(" tlvt %#8.8ux lint0 %#8.8ux lint1 %#8.8ux siv %#8.8ux\n", + lapicrget(Tlvt), lapicrget(Lint0), + lapicrget(Lint1), lapicrget(Siv)); +} + +void +lapicdump(void) +{ + int i; + + if(!DBGFLG) + return; + + DBG("lapicbase %#p lapmachno %d\n", lapicbase, lapmachno); + for(i = 0; i < Napic; i++) + lapicdump0(xlapic + i, i); +} + +static void +apictimer(Ureg* ureg, void*) +{ + clock(0, ureg->ip); +} + +int +lapiconline(void) +{ + Apic *apic; + u64int tsc; + u32int dfr, ver; + int apicno, nlvt; + + if(lapicbase == nil) + panic("lapiconline: no lapic base"); + + if((apicno = ((lapicrget(Id)>>24) & 0xff)) >= Napic) + panic("lapic: id too large %d", apicno); + if(apicno != m->apicno){ + panic("lapic: %d != %d", m->apicno, apicno); + dfr = lapicrget(Id) & ~(0xff<<24); + dfr |= m->apicno<<24; + lapicrput(Id, dfr); + apicno = m->apicno; + } + apic = &xlapic[apicno]; + if(!apic->useable || apic->addr != nil) + panic("lapiconline: lapic%d: useable %d addr %#p", + apicno, apic->useable, apic->addr); + + /* + * Things that can only be done when on the processor + * owning the APIC, apicinit above runs on the bootstrap + * processor. + */ + ver = lapicrget(Ver); + nlvt = ((ver>>16) & 0xff) + 1; + if(nlvt > nelem(apic->lvt)){ + print("lapiconline%d: nlvt %d > max (%d)\n", + apicno, nlvt, nelem(apic->lvt)); + nlvt = nelem(apic->lvt); + } + apic->nlvt = nlvt; + apic->ver = ver & 0xff; + + /* + * These don't really matter in Physical mode; + * set the defaults anyway. + */ +// if(memcmp(m->cpuinfo, "AuthenticAMD", 12) == 0) +// dfr = 0xf0000000; +// else + dfr = 0xffffffff; + lapicrput(Df, dfr); + lapicrput(Ld, 0x00000000); + + /* + * Disable interrupts until ready by setting the Task Priority + * register to 0xff. + */ + lapicrput(Tp, 0xff); + + /* + * Software-enable the APIC in the Spurious Interrupt Vector + * register and set the vector number. The vector number must have + * bits 3-0 0x0f unless the Extended Spurious Vector Enable bit + * is set in the HyperTransport Transaction Control register. + */ + lapicrput(Siv, Swen|IdtSPURIOUS); + + /* + * Acknowledge any outstanding interrupts. + */ + lapicrput(Eoi, 0); + + /* + * Use the TSC to determine the lapic timer frequency. + * It might be possible to snarf this from a chipset + * register instead. + */ + lapicrput(Tdc, DivX1); + lapicrput(Tlvt, Im); + tsc = rdtsc() + m->cpuhz/10; + lapicrput(Tic, 0xffffffff); + + while(rdtsc() < tsc) + ; + + apic->hz = (0xffffffff-lapicrget(Tcc))*10; + apic->max = apic->hz/HZ; + apic->min = apic->hz/(100*HZ); + apic->div = ((m->cpuhz/apic->max)+HZ/2)/HZ; + + if(m->machno == 0 || DBGFLG){ + print("lapic%d: hz %lld max %lld min %lld div %lld\n", apicno, + apic->hz, apic->max, apic->min, apic->div); + } + + /* + * Mask interrupts on Performance Counter overflow and + * Thermal Sensor if implemented, and on Lintr0 (Legacy INTR), + * and Lintr1 (Legacy NMI). + * Clear any Error Status (write followed by read) and enable + * the Error interrupt. + */ + switch(apic->nlvt){ + case 7: + case 6: + lapicrput(Tslvt, Im); + /*FALLTHROUGH*/ + case 5: + lapicrput(Pclvt, Im); + /*FALLTHROUGH*/ + default: + break; + } + lapicrput(Lint1, apic->lvt[1]|Im|IdtLINT1); + lapicrput(Lint0, apic->lvt[0]|Im|IdtLINT0); + + lapicrput(Es, 0); + lapicrget(Es); + lapicrput(Elvt, IdtERROR); + + /* + * Reload the timer to de-synchronise the processors, + * then lower the task priority to allow interrupts to be + * accepted by the APIC. + */ + microdelay((TK2MS(1)*1000/lapmachno) * m->machno); + lapicrput(Tic, apic->max); + + if(apic->machno == 0) + intrenable(IdtTIMER, apictimer, 0, -1, "APIC timer"); + lapicrput(Tlvt, Periodic|IrqTIMER); + if(m->machno == 0) + lapicrput(Tp, 0); + return 1; +} + +void +lapictimerset(uvlong next) +{ + Mpl pl; + Apic *apic; + vlong period; + + apic = &xlapic[(lapicrget(Id)>>24) & 0xff]; + + pl = splhi(); + lock(&m->apictimerlock); + + period = apic->max; + if(next != 0){ + period = next - rdtsc(); + period /= apic->div; + + if(period < apic->min) + period = apic->min; + else if(period > apic->max - apic->min) + period = apic->max; + } + lapicrput(Tic, period); + + unlock(&m->apictimerlock); + splx(pl); +} + +void +lapicsipi(int lapicno, uintmem pa) +{ + int i; + u32int crhi, crlo; + + /* + * SIPI - Start-up IPI. + * To do: checks on lapic validity. + */ + crhi = lapicno<<24; + lapicrput(Ichi, crhi); + lapicrput(Iclo, DSnone|TMlevel|Lassert|MTir); + microdelay(200); + lapicrput(Iclo, DSnone|TMlevel|MTir); + delay(10); + + crlo = DSnone|TMedge|MTsipi|((u32int)pa/(4*KiB)); + for(i = 0; i < 2; i++){ + lapicrput(Ichi, crhi); + lapicrput(Iclo, crlo); + microdelay(200); + } +} + +void +lapicipi(int lapicno) +{ + lapicrput(Ichi, lapicno<<24); + lapicrput(Iclo, DSnone|TMedge|Lassert|MTf|IdtIPI); + while(lapicrget(Iclo) & Ds) + ; +} + +void +lapicpri(int pri) +{ + lapicrput(Tp, pri); +} --- /sys/src/fs/amd64/ioapic.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/ioapic.c Tue Aug 27 16:11:00 2013 @@ -0,0 +1,549 @@ +#include "all.h" + +#include "apic.h" +#include "io.h" +//#include "adr.h" + +#define DBGFLG 0 +#define DBG(...) do{if(DBGFLG)print(__VA_ARGS__);}while(0) + +typedef struct Rbus Rbus; +typedef struct Rdt Rdt; + +struct Rbus { + Rbus *next; + int bustype; + int devno; + Rdt *rdt; +}; + +struct Rdt { + Apic *apic; + int intin; + u32int lo; + + int ref; /* could map to multiple busses */ + int enabled; /* times enabled */ +}; + +enum { /* IOAPIC registers */ + Ioregsel = 0x00, /* indirect register address */ + Iowin = 0x04, /* indirect register data */ + Ioipa = 0x08, /* IRQ Pin Assertion */ + Ioeoi = 0x10, /* EOI */ + + Ioapicid = 0x00, /* Identification */ + Ioapicver = 0x01, /* Version */ + Ioapicarb = 0x02, /* Arbitration */ + Ioabcfg = 0x03, /* Boot Coniguration */ + Ioredtbl = 0x10, /* Redirection Table */ +}; + +static Rdt rdtarray[Nrdt]; +static int nrdtarray; +static Rbus* rdtbus[Nbus]; +static Rdt* rdtvecno[IdtMAX+1]; + +static Lock idtnolock; +static int idtno = IdtIOAPIC; + +static Apic xioapic[Napic]; +static int isabusno = -1; + +/* BOTCH: no need for this concept; we've got the bustype */ +static void +ioapicisabus(int busno) +{ + if(isabusno != -1){ + if(busno == isabusno) + return; + print("ioapic: isabus redefined: %d ↛ %d\n", isabusno, busno); +////// return; + } + print("ioapic: isa busno %d\n", busno); + isabusno = busno; +} + +Apic* +ioapiclookup(uint id) +{ + Apic *a; + + if(id > nelem(xioapic)) + return nil; + a = xioapic + id; + if(a->useable) + return a; + return nil; +} + +int +gsitoapicid(int gsi, uint *intin) +{ + int i; + Apic *a; + + for(i=0; iuseable) + continue; + if(gsi >= a->ibase && gsi < a->ibase+a->nrdt){ + if(intin != nil) + *intin = gsi - a->ibase; + return a - xioapic; + } + } + print("gsitoapicid: no ioapic found for gsi %d\n", gsi); + return -1; +} + +static void +rtblget(Apic* apic, int sel, u32int* hi, u32int* lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + *hi = apic->addr[Iowin]; + apic->addr[Ioregsel] = sel; + *lo = apic->addr[Iowin]; +} + +static void +rtblput(Apic* apic, int sel, u32int hi, u32int lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + apic->addr[Iowin] = hi; + apic->addr[Ioregsel] = sel; + apic->addr[Iowin] = lo; +} + +Rdt* +rdtlookup(Apic *apic, int intin) +{ + int i; + Rdt *r; + + for(i = 0; i < nrdtarray; i++){ + r = rdtarray + i; + if(apic == r->apic && intin == r->intin) + return r; + } + return nil; +} + +void +ioapicintrinit(int bustype, int busno, int apicno, int intin, int devno, u32int lo) +{ + Rbus *rbus; + Rdt *rdt; + Apic *apic; + + if(busno >= Nbus || apicno >= Napic || nrdtarray >= Nrdt) + return; + + if(bustype == BusISA) + ioapicisabus(busno); + + apic = &xioapic[apicno]; + if(!apic->useable || intin >= apic->nrdt) + panic("ioapic: intrinit: usable %d nrdt %d: bus %d apic %d intin %d dev %d lo %.8ux\n", + apic->useable, apic->nrdt, busno, apicno, intin, devno, lo); + + rdt = rdtlookup(apic, intin); + if(rdt == nil){ + if(nrdtarray == nelem(rdtarray)){ + print("ioapic: intrinit: rdtarray too small\n"); + return; + } + rdt = &rdtarray[nrdtarray++]; + rdt->apic = apic; + rdt->intin = intin; + rdt->lo = lo; + }else{ + if(lo != rdt->lo){ + print("mutiple irq botch bus %d %d/%d/%d lo %.8ux vs %.8ux\n", + busno, apicno, intin, devno, lo, rdt->lo); + return; + } + DBG("dup rdt %d %d %d %d %.8ux\n", busno, apicno, intin, devno, lo); + } + rdt->ref++; + rbus = ialloc(sizeof *rbus, 0); + rbus->rdt = rdt; + rbus->bustype = bustype; + rbus->devno = devno; + rbus->next = rdtbus[busno]; + rdtbus[busno] = rbus; +} + +/* + * deal with ioapics at the same physical address. seen on + * certain supermicro atom systems. the hope is that only + * one will be used, and it will be the second one initialized. + * (the pc kernel ignores this issue.) it could be that mp and + * acpi have different numbering? + */ +static Apic* +dupaddr(uintmem pa) +{ + int i; + Apic *p; + + for(i = 0; i < nelem(xioapic); i++){ + p = xioapic + i; + if(p->paddr == pa) + return p; + } + return nil; +} + +Apic* +ioapicinit(int id, int ibase, uintmem pa) +{ + Apic *apic, *p; + static int base; + + /* + * Mark the IOAPIC useable if it has a good ID + * and the registers can be mapped. + */ + if(id >= Napic) + return nil; + if((apic = xioapic+id)->useable) + return apic; + + if((p = dupaddr(pa)) != nil){ + print("ioapic%d: same pa as apic%ld\n", id, p-xioapic); + if(ibase != -1) + return nil; /* mp irqs reference mp apic#s */ + apic->addr = p->addr; + } + else{ +// adrmapck(pa, 1024, Aapic, Mfree); /* not in adr? */ + if((apic->addr = vmap(pa, 1024)) == nil){ + print("ioapic%d: can't vmap %#P\n", id, pa); + return nil; + } + } + apic->useable = 1; + apic->paddr = pa; + + /* + * Initialise the I/O APIC. + * The MultiProcessor Specification says it is the + * responsibility of the O/S to set the APIC ID. + */ + lock(apic); + apic->addr[Ioregsel] = Ioapicver; + apic->nrdt = (apic->addr[Iowin]>>16 & 0xff) + 1; + if(ibase != -1) + apic->ibase = ibase; + else{ + apic->ibase = base; + base += apic->nrdt; + } + apic->addr[Ioregsel] = Ioapicid; + apic->addr[Iowin] = id<<24; + unlock(apic); + + return apic; +} + +static void +·ioapicdump(void) +{ + int i, n; + Rbus *rbus; + Rdt *rdt; + Apic *apic; + u32int hi, lo; + + for(i = 0; i < Napic; i++){ + apic = &xioapic[i]; + if(!apic->useable || apic->addr == 0) + continue; + print("ioapic %d addr %#p nrdt %d ibase %d\n", + i, apic->addr, apic->nrdt, apic->ibase); + for(n = 0; n < apic->nrdt; n++){ + lock(apic); + rtblget(apic, n, &hi, &lo); + unlock(apic); + print(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); + prflush(); + } + } + for(i = 0; i < Nbus; i++){ + if((rbus = rdtbus[i]) == nil) + continue; + print("iointr bus %d:\n", i); + for(; rbus != nil; rbus = rbus->next){ + rdt = rbus->rdt; + print(" apic %ld devno %#ux (%d %d) intin %d lo %#ux ref %d\n", + rdt->apic-xioapic, rbus->devno, rbus->devno>>2, + rbus->devno & 0x03, rdt->intin, rdt->lo, rdt->ref); + prflush(); + } + } +} + +void +ioapicdump(void) +{ + if(DBGFLG) + ·ioapicdump(); +} + +void +cmd_ioapicdump(int, char**) +{ + ·ioapicdump(); +} + +void +ioapiconline(void) +{ + int i; + Apic *apic; + + for(apic = xioapic; apic < &xioapic[Napic]; apic++){ + if(!apic->useable || apic->addr == nil) + continue; + for(i = 0; i < apic->nrdt; i++){ + lock(apic); + rtblput(apic, i, 0, Im); + unlock(apic); + } + } + cmd_install("ioapic", "-- ioapic dump", cmd_ioapicdump); +} + +static int +ioapicintrdd(u32int* hi, u32int* lo) +{ + Apic *lapic; + Mach *mach; + static int i; + + /* + * Set delivery mode (lo) and destination field (hi) + * + * Currently, assign each interrupt to a different CPU + * using physical mode delivery. Using the topology + * (packages/cores/threads) could be helpful. + */ + for(;; i = (i+1) % Napic){ + if((lapic = lapiclookup(i)) == nil) + continue; + if((mach = sys->machptr[lapic->machno]) == nil) + continue; + if(mach->online) + break; + } + *hi = i++<<24; + *lo |= Pm|MTf; + return mach->machno; +} + +int +nextvec(void) +{ + uint vecno; + + lock(&idtnolock); + vecno = idtno; + idtno = (idtno+8) % IdtMAX; + if(idtno < IdtIOAPIC) + idtno += IdtIOAPIC; + unlock(&idtnolock); + + return vecno; +} + +static int +msimask(Vkey *v, int mask) +{ + Pcidev *p; + + p = pcimatchtbdf(v->tbdf); + if(p == nil) + return -1; + return pcimsimask(p, mask); +} + +static int +intrenablemsi(Vctl* v, Pcidev *p) +{ + uint vno, lo, hi; + uvlong msivec; + + vno = nextvec(); + + lo = IPlow | TMedge | vno; + v->affinity = ioapicintrdd(&hi, &lo); + + if(lo & Lm) + lo |= MTlp; + + msivec = (uvlong)hi<<32 | lo; + if(pcimsienable(p, msivec) == -1) + return -1; + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vno; + v->type = "msi"; + v->mask = msimask; + + DBG("msiirq: %τ: enabling %.16llux %s irq %d vno %d\n", p->tbdf, msivec, v->name, v->irq, vno); + return vno; +} + +int +disablemsi(Vctl*, Pcidev *p) +{ + if(p == nil) + return -1; + return pcimsimask(p, 1); +} + +int +ioapicintrenable(Vctl* v) +{ + Rbus *rbus; + Rdt *rdt; + u32int hi, lo; + int bustype, busno, devno, vecno; + + if(v->tbdf == BUSUNKNOWN){ + if(v->irq >= IrqLINT0 && v->irq <= MaxIrqLAPIC){ + if(v->irq != IrqSPURIOUS) + v->isr = lapiceoi; + v->type = "lapic"; + return v->irq; + } + else{ + /* + * Legacy ISA. + * Make a busno and devno using the + * ISA bus number and the irq. + */ + if(isabusno == -1) + panic("no ISA bus allocated"); + busno = isabusno; + devno = v->irq; + bustype = BusISA; + } + } + else if((bustype = BUSTYPE(v->tbdf)) == BusPCI){ + /* + * PCI. + * Make a devno from BUSDNO(tbdf) and pcidev->intp. + */ + Pcidev *pcidev; + + busno = BUSBNO(v->tbdf); + if((pcidev = pcimatchtbdf(v->tbdf)) == nil) + panic("no PCI dev for tbdf %τ", v->tbdf); + if((vecno = intrenablemsi(v, pcidev)) != -1) + return vecno; + disablemsi(v, pcidev); + if((devno = pcicfgr8(pcidev, PciINTP)) == 0) + panic("no INTP for tbdf %τ", v->tbdf); + devno = BUSDNO(v->tbdf)<<2|(devno-1); + DBG("ioapicintrenable: tbdf %τ busno %d devno %d\n", + v->tbdf, busno, devno); + } + else{ + SET(busno, devno); + panic("unknown tbdf %τ", v->tbdf); + } + + rdt = nil; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno && rbus->bustype == bustype){ + rdt = rbus->rdt; + break; + } + if(rdt == nil){ + /* + * First crack in the smooth exterior of the new code: + * some BIOS make an MPS table where the PCI devices are + * just defaulted to ISA. + * Rewrite this to be cleaner. + */ + if((busno = isabusno) == -1) + return -1; + devno = v->irq<<2; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; + break; + } + DBG("isa: tbdf %τ busno %d devno %d %#p\n", + v->tbdf, busno, devno, rdt); + } + if(rdt == nil) + return -1; + + /* + * Assume this is a low-frequency event so just lock + * the whole IOAPIC to initialise the RDT entry + * rather than putting a Lock in each entry. + */ + lock(rdt->apic); + DBG("%τ: %ld/%d/%d (%d)\n", v->tbdf, rdt->apic - xioapic, rbus->devno, rdt->intin, devno); + if((rdt->lo & 0xff) == 0){ + vecno = nextvec(); + rdt->lo |= vecno; + rdtvecno[vecno] = rdt; + }else + DBG("%τ: mutiple irq bus %d dev %d\n", v->tbdf, busno, devno); + + rdt->enabled++; + lo = (rdt->lo & ~Im); + v->affinity = ioapicintrdd(&hi, &lo); + rtblput(rdt->apic, rdt->intin, hi, lo); + vecno = lo & 0xff; + unlock(rdt->apic); + + DBG("busno %d devno %d hi %#.8ux lo %#.8ux vecno %d\n", + busno, devno, hi, lo, vecno); + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vecno; + v->type = "ioapic"; + + return vecno; +} + +int +ioapicintrdisable(int vecno) +{ + Rdt *rdt; + + /* + * FOV. Oh dear. This isn't very good. + * Fortunately rdtvecno[vecno] is static + * once assigned. + * Must do better. + * + * What about any pending interrupts? + */ + if(vecno < 0 || vecno > MaxVectorAPIC){ + panic("ioapicintrdisable: vecno %d out of range", vecno); + return -1; + } + if((rdt = rdtvecno[vecno]) == nil){ + panic("ioapicintrdisable: vecno %d has no rdt", vecno); + return -1; + } + + lock(rdt->apic); + rdt->enabled--; + if(rdt->enabled == 0) + rtblput(rdt->apic, rdt->intin, 0, rdt->lo); + unlock(rdt->apic); + + return 0; +} --- /sys/src/fs/amd64/msi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/msi.c Tue Aug 27 16:11:01 2013 @@ -0,0 +1,113 @@ +#include "all.h" +#include "io.h" +#include "apic.h" + +enum { + Dpcicap = 1<<0, + Dmsicap = 1<<1, + Dvec = 1<<2, + Debug = 0, +}; + +enum { + /* address */ + Msiabase = 0xfee00000u, + Msiadest = 1<<12, /* same as 63:56 of apic vector */ + Msiaedest = 1<<4, /* same as 55:48 of apic vector */ + Msialowpri = 1<<3, /* redirection hint */ + Msialogical = 1<<2, + + /* data */ + Msidlevel = 1<<15, + Msidassert = 1<<14, + Msidlogical = 1<<11, + Msidmode = 1<<8, /* 3 bits; delivery mode */ + Msidvector = 0xff<<0, +}; + +enum{ + /* msi capabilities */ + Vmask = 1<<8, + Cap64 = 1<<7, + Mmesgmsk = 7<<4, + Mmcap = 7<<1, + Msienable = 1<<0, +}; + +static int +msicap(Pcidev *p) +{ + int c; + + c = pcicap(p, PciCapMSI); + if(c == -1) + return 0; + return c; +} + +static int +blacklist(Pcidev *p) +{ + switch(p->vid<<16 | p->did){ + case 0x11ab<<16 | 0x6485: + return -1; + } + return 0; +} + +int +pcimsienable(Pcidev *p, uvlong vec) +{ + char *s; + uint c, f, d, datao, lopri, dmode, logical; + + c = msicap(p); + if(c == 0) + return -1; + + f = pcicfgr16(p, c + 2) & ~Mmesgmsk; + + if(blacklist(p) != 0) + return -1; + datao = 8; + d = vec>>48; + lopri = (vec & 0x700) == MTlp; + logical = (vec & Lm) != 0; + pcicfgw32(p, c + 4, Msiabase | Msiaedest * d + | Msialowpri * lopri | Msialogical * logical); + if(f & Cap64){ + datao += 4; + pcicfgw32(p, c + 8, 0); + } + dmode = (vec >> 8) & 7; + pcicfgw16(p, c + datao, Msidassert | Msidlogical * logical + | Msidmode * dmode | (uint)vec & 0xff); + if(f & Vmask) + pcicfgw32(p, c + datao + 4, 0); + + /* leave vectors configured but disabled for debugging */ + if((s = getconf("*nomsi")) != nil && strtoul(s, nil, 0) != 0) + return -1; + + pcicfgw16(p, c + 2, f); + return 0; +} + +int +pcimsimask(Pcidev *p, int mask) +{ + uint c, f; + + c = msicap(p); + if(c == 0) + return -1; + f = pcicfgr16(p, c + 2) & ~Msienable; + if(mask){ + pcicfgw16(p, c + 2, f & ~Msienable); +// pciclrbme(p); cheeze + }else{ + pcisetbme(p); + pcicfgw16(p, c + 2, f | Msienable); + } + return 0; +} --- /sys/src/fs/amd64/squid.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/squid.c Tue Aug 27 16:11:01 2013 @@ -0,0 +1,52 @@ +#include "all.h" +#include "ureg.h" +#include "io.h" +#include "apic.h" + +#define DBG(...) + +void +squidboy(int apicno) +{ + sys->machptr[m->machno] = m; +// setmachsched(m); + + m->perf.period = 1; + m->cpuhz = sys->machptr[0]->cpuhz; + m->cpumhz = sys->machptr[0]->cpumhz; + + DBG("Hello Squidboy %d %d\n", apicno, m->machno); + + vsvminit(MACHSTKSZ); + apmmuinit(); + if(!lapiconline()) + ndnr(); + fpuinit(); + m->splpc = 0; + m->online = 1; + + /* + * CAUTION: no time sync done, etc. + */ + DBG("Wait for the thunderbirds!\n"); + while(!active.thunderbirdsarego) + ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + + DBG("cpu%d color %d tsc %lld\n", + m->machno, machcolor(m->machno), m->rdtsc); + + /* + * Enable the timer interrupt. + */ +// apictimerenab(); + lapicpri(0); + +//// timersinit(); +//// adec(&active.nbooting); +//// ainc(&active.nonline); + + schedinit(); + panic("cpu%d: apic%d: squidboy returns", m->machno, apicno); +} --- /sys/src/fs/amd64/archdat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/archdat.h Tue Aug 27 16:11:02 2013 @@ -0,0 +1,197 @@ +/* forward declarations */ +typedef struct User User; + +typedef struct Cpuidreg Cpuidreg; +typedef struct Filter Filter; +typedef ulong Float; +typedef struct Label Label; +typedef struct Lock Lock; +typedef struct MACH MACH; +typedef struct Mach Mach; +typedef struct Mbank Mbank; +typedef struct Mconf Mconf; +typedef struct MMMU MMMU; +typedef u64int Mpl; +typedef struct Page Page; +typedef struct Pcidev Pcidev; +typedef struct Perf Perf; +typedef u64int PTE; +typedef struct Sys Sys; +typedef u64int uintmem; + +/* fully declared elsewhere */ +#pragma incomplete Page +#pragma incomplete Pcidev + +#pragma varargck type "P" uintmem +#pragma varargck type "τ" int + + +enum { + NPGSZ = 4, +}; + +struct Lock +{ + u32int* sbsem; /* addr of sync bus semaphore */ + uintptr pc; + Mpl sr; + + Mach *m; + User *p; + char isilock; +}; + +enum { + MAXBANK = 8, +}; + +struct Mbank { + uintptr base; + uintptr limit; +}; + +struct Mconf { + Lock; + Mbank bank[MAXBANK]; + int nbank; +}; + +/* + * MMU stuff in Mach. + */ +struct MMMU +{ + Page* pml4; /* pml4 for this processor */ + PTE* pmap; /* unused as of yet */ + + uint pgszlg2[NPGSZ]; /* per Mach or per Sys? */ + uintmem pgszmask[NPGSZ]; + uint pgsz[NPGSZ]; + int npgsz; + + uchar pml4kludge[128]; +// Page pml4kludge; /* NIX KLUDGE: we need a page */ +}; + +/* + * performance timers, all units in perfticks + */ +struct Perf +{ + u64int intrts; /* time of last interrupt */ + u64int inintr; /* time since last clock tick in interrupt handlers */ + u64int avg_inintr; /* avg time per clock tick in interrupt handlers */ + u64int inidle; /* time since last clock tick in idle loop */ + u64int avg_inidle; /* avg time per clock tick in idle loop */ + u64int last; /* value of perfticks() at last clock tick */ + u64int period; /* perfticks() per clock tick */ +}; + +struct MACH +{ + uintptr splpc; /* known to assembly as 8(RMACH) */ + uintptr stack; + uchar* vsvm; + void* gdt; + void* tss; + + Lock apictimerlock; + int apicno; + int online; + + Perf perf; + + u64int rdtsc; +}; + +struct Filter +{ + ulong count; /* count and old count kept separate */ + ulong oldcount; /* so interrput can read them */ + Float filter[3]; /* filter */ +}; + +struct Label +{ + uintptr sp; + uintptr pc; +}; + +struct Mach +{ + int machno; /* physical id of processor */ + MACH; /* locations known to assmbly */ + int lights; /* light lights, this processor */ + Filter idle; + + User* proc; /* current process on this processor */ + Label sched; /* scheduler wakeup */ + + User* intrp; /* process that was interrupted */ + + int lastintr; + int spuriousintr; + + MMMU; + + int cpumhz; + uvlong cpuhz; +}; + +struct Cpuidreg { + u32int ax; + u32int bx; + u32int cx; + u32int dx; +}; + +/* + * This is the low memory map, between 0x100000 and 0x110000. + * It is located there to allow fundamental datastructures to be + * created and used before knowing where free memory begins + * (e.g. there may be modules located after the kernel BSS end). + * The layout is known in the bootstrap code in l32p.s. + * It is logically two parts: the per processor data structures + * for the bootstrap processor (stack, Mach, vsvm, and page tables), + * and the global information about the system (syspage, ptrpage). + * Some of the elements must be aligned on page boundaries, hence + * the unions. + */ +struct Sys { + uchar machstk[MACHSTKSZ]; + + PTE pml4[PTSZ/sizeof(PTE)]; /* */ + PTE pdp[PTSZ/sizeof(PTE)]; + PTE pd[PTSZ/sizeof(PTE)]; + PTE pt[PTSZ/sizeof(PTE)]; + + uchar vsvmpage[4*KiB]; + + union { + Mach mach; + uchar machpage[MACHSZ]; + }; + union { + struct { + long ticks; /* convert to u64int for tsc? */ + u64int epoch; /* tsc synchronization */ + }; + uchar syspage[4*KiB]; + }; + union { + Mach* machptr[MACHMAX]; + uchar ptrpage[4*KiB]; + }; + uchar pad[2][4096]; +}; + +extern Sys* sys; +extern Mach mach0; +extern Mconf mconf; +extern char nvrfile[128]; +extern register Mach* m; +extern register User* u; + +#define MACHP(n) sys->machptr[n] +#define Ticks sys->ticks --- /sys/src/fs/amd64/options.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/options.c Tue Aug 27 16:11:02 2013 @@ -0,0 +1,179 @@ +#include "u.h" +#include "../port/lib.h" +#include "dat.h" +#include "fns.h" +#include "adr.h" +#include "io.h" + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 3584 bytes available at CONFADDR. + */ +#define CONFADDR PTR2UINT(KADDR(0x0001200)) + +#define BOOTLINE ((char*)CONFADDR) +#define BOOTLINELEN 64 +#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (4096-0x200-BOOTLINELEN) + +enum { + Maxconf = 64, +}; + +typedef struct C C; +struct C { + char *name; + char *val; +}; + +static C cfg[Maxconf]; +static int ncfg; +static char dbgflg[127]; + +static void +parseoptions(void) +{ + long i, n; + char *cp, *line[Maxconf]; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + n = getfields(cp, line, Maxconf, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + cfg[ncfg].name = line[i]; + cfg[ncfg].val = cp; + ncfg++; + } +} + +static void +cmdline(void) +{ + char *p, *f[32], **argv, buf[200]; + int argc, n, o; + + p = getconf("*cmdline"); + if(p == nil) + return; + snprint(buf, sizeof buf, "%s", p); + argc = tokenize(buf, f, nelem(f)); + argv = f; + + /* + * Process flags. + * Flags [A-Za-z] may be optionally followed by + * an integer level between 1 and 127 inclusive + * (no space between flag and level). + * '--' ends flag processing. + */ + while(--argc > 0 && (*++argv)[0] == '-' && (*argv)[1] != '-'){ + while(o = *++argv[0]){ + if(!(o >= 'A' && o <= 'Z') && !(o >= 'a' && o <= 'z')) + continue; + n = strtol(argv[0]+1, &p, 0); + if(p == argv[0]+1 || n < 1 || n > 127) + n = 1; + argv[0] = p-1; + dbgflg[o] = n; + } + } +} + +static int typemap[] = { + Anone, + Amemory, + Areserved, + Aacpireclaim, + Aacpinvs, + Aunusable, + Adisable, +}; + +static void +e820(void) +{ + char *p, *s; + uvlong base, len, type; + + p = getconf("*e820"); + if(p == nil) + return; + for(s = p;;){ + if(*s == 0) + break; + type = strtoull(s, &s, 16); + if(*s != ' ') + break; + base = strtoull(s, &s, 16); + if(*s != ' ') + break; + len = strtoull(s, &s, 16) - base; + if(*s != ' ' && *s != 0 || len == 0) + break; + if(type >= nelem(typemap)) + continue; + adrmapinit(base, len, typemap[type], Mfree); + } +} + +void +options(void) +{ + parseoptions(); + e820(); + cmdline(); +} + + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < ncfg; i++) + if(cistrcmp(cfg[i].name, name) == 0) + return cfg[i].val; + return nil; +} + +int +pciconfig(char *class, int ctlrno, Pciconf *pci) +{ + char cc[32], *p; + int i; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + p = getconf(cc); + if(p == nil) + return 0; + + pci->type = ""; + snprint(pci->optbuf, sizeof pci->optbuf, "%s", p); + pci->nopt = tokenize(pci->optbuf, pci->opt, nelem(pci->opt)); + for(i = 0; i < pci->nopt; i++){ + p = pci->opt[i]; + if(cistrncmp(p, "type=", 5) == 0) + pci->type = p + 5; + else if(cistrncmp(p, "port=", 5) == 0) + pci->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + pci->irq = strtoul(p+4, &p, 0); +// else if(cistrncmp(p, "mem=", 4) == 0) +// pci->mem = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "tbdf=", 5) == 0) + pci->tbdf = strtotbdf(p+5, &p, 0); + } + return 1; +} --- /sys/src/fs/amd64/iasata.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/iasata.c Tue Aug 27 16:11:03 2013 @@ -0,0 +1,1689 @@ +/* + * intel/amd ahci sata controller + * copyright © 2007-12 coraid, inc. + */ + +#include "all.h" +#include "io.h" +#include +#include "ahci.h" + +#define dprint(...) if(debug) print(__VA_ARGS__); else USED(debug) +#define idprint(...) if(prid ) print(__VA_ARGS__); else USED(prid) +#define aprint(...) if(datapi) print(__VA_ARGS__); else USED(datapi); + +enum { + SDok = 0, + SDeio = -1, + SDretry = -2, + SDcheck = -3, +}; + +enum { + NCtlr = 4, + NCtlrdrv = 32, + NDrive = NCtlr*NCtlrdrv, + + Fahdrs = 4, + + Read = 0, + Write, + + Eesb = 1<<0, /* must have (Eesb & Emtype) == 0 */ + + /* pci space configuration */ + Pmap = 0x90, + Ppcs = 0x91, + + Nms = 256, + Mphywait = 2*1024/Nms - 1, + Midwait = 16*1024/Nms - 1, + Mcomrwait = 64*1024/Nms - 1, +}; + +enum { + Tesb, + Tsb600, + Tjmicron, + Tahci, + Tlast, +}; + +typedef struct Ctlrtype Ctlrtype; +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +struct Ctlrtype { + uint type; + uint maxdmaxfr; + uint flags; + char *name; +}; + +Ctlrtype cttab[Tlast] = { +[Tesb] Tesb, 8192, 0, "63xxesb", +[Tsb600] Tsb600, 256, 0, "sb600", +[Tjmicron] Tjmicron, 8192, 0, "jmicron", +[Tahci] Tahci, 8192, 0, "ahci", +}; + +enum { + Dnull = 0, + Dmissing = 1<<0, + Dnew = 1<<1, + Dready = 1<<2, + Derror = 1<<3, + Dreset = 1<<4, + Doffline = 1<<5, + Dportreset = 1<<6, + Dlast = 8, +}; + +static char *diskstates[Dlast] = { + "null", + "missing", + "new", + "ready", + "error", + "reset", + "offline", + "portreset", +}; + +enum { + DMautoneg, + DMsatai, + DMsataii, + DMsataiii, + DMlast, +}; + +static char *modes[DMlast] = { + "auto", + "satai", + "sataii", + "sataiii", +}; + +struct Drive { + Lock; + + Ctlr *ctlr; + char name[10]; + Aport *port; + Aportm portm; + Aportc portc; /* redundant ptr to port and portm. */ + + uchar drivechange; + uchar state; + + uvlong sectors; + uint secsize; + ulong totick; + ulong lastseen; + uint wait; + uchar mode; + uchar active; + + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + uvlong wwn; + + ushort info[0x200]; + + /* + * ahci allows non-sequential ports. + * to avoid this hassle, we let + * driveno ctlr*NCtlrdrv + unit + * portno nth available port + */ + uint driveno; + uint portno; + + Filter rate[2]; + int fflag; + int init; + ulong reads, writes; +}; + +struct Ctlr { + Lock; + + Ctlrtype *type; + Pcidev *pci; + + uchar *mmio; + u32int *lmmio; + Ahba *hba; + + Drive rawdrive[NCtlrdrv]; + Drive* drive[NCtlrdrv]; + int ndrive; + uint pi; +}; + +static Ctlr iactlr[NCtlr]; +static int niactlr; + +static Drive *iadrive[NDrive]; +static int niadrive; + +static int debug; +static int prid = 1; +static int datapi; + +static char stab[] = { +[0] 'i', 'm', +[8] 't', 'c', 'p', 'e', +[16] 'N', 'I', 'W', 'B', 'D', 'C', 'H', 'S', 'T', 'F', 'X' +}; + +static void +serrstr(u32int r, char *s, char *e) +{ + int i; + + e -= 3; + for(i = 0; i < nelem(stab) && s < e; i++) + if(r & (1<task, p->cmd, p->ci, p->isr); +} + +void +xsleep(Rendez *r, int (*f)(void*), void *a) +{ + if(u) + sleep(r, f, a); + else + for(; !f(a);) + delay(1); +} + +void +xtsleep(Rendez *r, int (*f)(void*), void *a, int ms) +{ + int i; + + if(u) + tsleep(r, f, a, ms); + else + for(i = 0; i < ms; i++){ + if(f(a)) + break; + delay(1); + } +} + +static void +esleep(int ms) +{ + xtsleep(&u->tsleep, no, 0, ms); +} + +typedef struct { + Aport *p; + int i; +} Asleep; + +static int +ahciclear(void *v) +{ + Asleep *s; + + s = v; + return (s->p->ci & s->i) == 0; +} + +static void +aesleep(Aportm *m, Asleep *a, int ms) +{ + xtsleep(m, ahciclear, a, ms); +} + +static int +ahciwait(Aportc *c, int ms) +{ + Aport *p; + Asleep as; + + p = c->p; + p->ci = 1; + as.p = p; + as.i = 1; + aesleep(c->m, &as, ms); + if((p->task & 1) == 0 && p->ci == 0) + return 0; + dreg("ahciwait fail/timeout ", c->p); + return -1; +} + +static void +mkalist(Aportm *m, uint flags, uchar *data, int len) +{ + Actab *t; + Alist *l; + Aprdt *p; + + t = m->ctab; + l = m->list; + l->flags = flags | 0x5; + l->len = 0; + l->ctab = Pciwaddrl(t); + l->ctabhi = Pciwaddrh(t); + if(data){ + l->flags |= 1<<16; + p = &t->prdt; + p->dba = Pciwaddrl(data); + p->dbahi = Pciwaddrh(data); + p->count = 1<<31 | len - 2 | 1; + } +} + +static int +nop(Aportc *pc) +{ + uchar *c; + + if((pc->m->feat & Dnop) == 0) + return -1; + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static int +setfeatures(Aportc *pc, uchar f, uint w) +{ + uchar *c; + + c = pc->m->ctab->cfis; + featfis(pc->m, c, f); + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, w); +} + +static int +settxmode(Aportc *pc, uchar f) +{ + uchar *c; + + c = pc->m->ctab->cfis; + if(txmodefis(pc->m, c, f) == -1) + return 0; + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static void +asleep(int ms) +{ + esleep(ms); +} + +static int +ahciportreset(Aportc *c, uint mode) +{ + int i; + u32int *cmd; + Aport *p; + + p = c->p; + cmd = &p->cmd; + *cmd &= ~(Afre|Ast); + for(i = 0; i < 500; i += 25){ + if((*cmd & Acr) == 0) + break; + asleep(25); + } + p->sctl = 3*Aipm | 0*Aspd | Adet; + delay(1); + p->sctl = 3*Aipm | mode*Aspd; + return 0; +} + +static int +ahciidentify0(Aportc *pc, void *id) +{ + uchar *c; + Actab *t; + + t = pc->m->ctab; + c = t->cfis; + memset(id, 0, 0x200); + identifyfis(pc->m, c); + mkalist(pc->m, 0, id, 0x200); + return ahciwait(pc, 3*1000); +} + +static vlong +ahciidentify(Aportc *pc, ushort *id, uint *ss, char *d) +{ + int i, n; + vlong s; + Aportm *m; + + m = pc->m; + for(i = 0;; i++){ + if(i > 5 || ahciidentify0(pc, id) != 0) + return -1; + n = idpuis(id); + if(n & Pspinup && setfeatures(pc, 7, 20*1000) == -1) + print("%s: puis spinup fail\n", d); + if(n & Pidready) + break; + print("%s: puis waiting\n", d); + } + s = idfeat(m, id); + *ss = idss(m, id); + if(s == -1 || (m->feat&Dlba) == 0){ + if((m->feat&Dlba) == 0) + dprint("%s: no lba support\n", d); + return -1; + } + return s; +} + +static int +ahciquiet(Aport *a) +{ + int i; + u32int *p; + + p = &a->cmd; + *p &= ~Ast; + for(i = 0; i < 500; i += 50){ + if((*p & Acr) == 0) + goto stop; + asleep(50); + } + return -1; +stop: + if((a->task & (ASdrq|ASbsy)) == 0){ + *p |= Ast; + return 0; + } + + *p |= Aclo; + for(i = 0; i < 500; i += 50){ + if((*p & Aclo) == 0) + goto stop1; + asleep(50); + } + return -1; +stop1: + /* extra check */ + dprint("ahci: clo clear %ux\n", a->task); + if(a->task & ASbsy) + return -1; + *p |= Afre | Ast; + return 0; +} + +static int +ahcicomreset(Aportc *pc) +{ + uchar *c; + + dreg("comreset ", pc->p); + if(ahciquiet(pc->p) == -1){ + dprint("ahci: ahciquiet failed\n"); + return -1; + } + dreg("comreset ", pc->p); + + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 1); + mkalist(pc->m, Lclear | Lreset, 0, 0); + if(ahciwait(pc, 500) == -1){ + dprint("ahci: comreset1 failed\n"); + return -1; + } + microdelay(250); + dreg("comreset ", pc->p); + + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + if(ahciwait(pc, 150) == -1){ + dprint("ahci: comreset2 failed\n"); + return -1; + } + dreg("comreset ", pc->p); + return 0; +} + +static int +ahciidle(Aport *port) +{ + int i, r; + u32int *p; + + p = &port->cmd; + if((*p & Arun) == 0) + return 0; + *p &= ~Ast; + r = 0; + for(i = 0; i < 500; i += 25){ + if((*p & Acr) == 0) + goto stop; + asleep(25); + } + r = -1; +stop: + if((*p & Afre) == 0) + return r; + *p &= ~Afre; + for(i = 0; i < 500; i += 25){ + if((*p & Afre) == 0) + return 0; + asleep(25); + } + return -1; +} + +/* + * §6.2.2.1 first part; comreset handled by reset disk. + * - remainder is handled by configdisk. + * - ahcirecover is a quick recovery from a failed command. + */ +static int +ahciswreset(Aportc *pc) +{ + int i; + + i = ahciidle(pc->p); + pc->p->cmd |= Afre; + if(i == -1) + return -1; + if(pc->p->task & (ASdrq|ASbsy)) + return -1; + return 0; +} + +static int +ahcirecover(Aportc *pc) +{ + ahciswreset(pc); + pc->p->cmd |= Ast; + if(settxmode(pc, pc->m->udma) == -1) + return -1; + return 0; +} + +static void* +mallocalign(usize size, usize align, long, usize) +{ + return ialloc(size, align); +} + +static void +setupfis(Afis *f) +{ + f->base = mallocalign(0x100, 0x100, 0, 0); + f->d = f->base + 0; + f->p = f->base + 0x20; + f->r = f->base + 0x40; + f->u = f->base + 0x60; + f->devicebits = (u32int*)(f->base + 0x58); +} + +static void +ahciwakeup(Aportc *c, uint mode) +{ + ushort s; + + s = c->p->sstatus; + if((s & Isleepy) == 0) + return; + if((s & Smask) != Spresent){ + print("ahci: slumbering drive missing %.3ux\n", s); + return; + } + ahciportreset(c, mode); +// print("ahci: wake %.3ux -> %.3lux\n", s, c->p->sstatus); +} + +static int +ahciconfigdrive(Ahba *h, Aportc *c, int mode) +{ + Aportm *m; + Aport *p; + + p = c->p; + m = c->m; + + if(m->list == 0){ + setupfis(&m->fis); + m->list = mallocalign(sizeof *m->list, 1024, 0, 0); + m->ctab = mallocalign(sizeof *m->ctab, 128, 0, 0); + } + + p->list = Pciwaddrl(m->list); + p->listhi = Pciwaddrh(m->list); + p->fis = Pciwaddrl(m->fis.base); + p->fishi = Pciwaddrh(m->fis.base); + + p->cmd |= Afre; + + if((p->sstatus & Sbist) == 0 && (p->cmd & Apwr) != Apwr) + if((p->sstatus & Sphylink) == 0 && h->cap & Hss){ + dprint("ahci: spin up ... [%.3ux]\n", p->sstatus); + p->cmd |= Apwr; + for(int i = 0; i < 1400; i += 50){ + if(p->sstatus & (Sphylink | Sbist)) + break; + asleep(50); + } + } + + p->serror = SerrAll; + + if((p->sstatus & SSmask) == (Isleepy | Spresent)) + ahciwakeup(c, mode); + /* disable power managment sequence from book. */ + p->sctl = 3*Aipm | mode*Aspd | 0*Adet; + p->cmd &= ~Aalpe; + + p->cmd |= Ast; + p->ie = IEM; + + return 0; +} + +static void +setstate(Drive *d, int state) +{ + ilock(d); + d->state = state; + iunlock(d); +} + +static void +ahcienable(Ahba *h) +{ + h->ghc |= Hie; +} + +static void +ahcidisable(Ahba *h) +{ + h->ghc &= ~Hie; +} + +static int +countbits(u32int u) +{ + int i, n; + + n = 0; + for(i = 0; i < 32; i++) + if(u & (1<hba = (Ahba*)c->mmio; + u = h->cap; + + if((u & Ham) == 0) + h->ghc |= Hae; + + print("ahci hba sss %d; ncs %d; coal %d; mports %d; led %d; clo %d; ems %d;\n", + (u>>27) & 1, (u>>8) & 0x1f, (u>>7) & 1, u & 0x1f, (u>>25) & 1, + (u>>24) & 1, (u>>6) & 1); + return countbits(h->pi); +} + +static int +ahcihbareset(Ahba *h) +{ + int wait; + + h->ghc |= Hhr; + for(wait = 0; wait < 1000; wait += 100){ + if(h->ghc == 0) + return 0; + delay(100); + } + return -1; +} + +/* under development */ +static int +ahcibioshandoff(Ahba *h) +{ + int i, wait; + + if((h->cap2 & Boh) == 0) + return 0; + if((h->bios & Bos) == 0) + return 0; + + print("ahcibioshandoff: claim\n"); + h->bios |= Oos; + + wait = 25; + for(i = 0; i < wait; i++){ + delay(1); + if((h->bios & Bos) == 0) + break; + if(i < 25 && h->bios & Bb){ + print("ahcibioshandoff: busy\n"); + wait = 2000; + } + } + if(i == wait){ + print("ahcibioshandoff: timeout %.1ux\n", h->bios); + h->bios = Oos; + } + return 0; +} + +static char* +dstate(uint s) +{ + int i; + + for(i = 0; s; i++) + s >>= 1; + return diskstates[i]; +} + +static char* +tnam(Ctlr *c) +{ + return c->type->name; +} + +static char* +dnam(Drive *d) +{ + char *s; + + s = d->name; + if(d->name[0] == 0) + snprint(d->name, sizeof d->name, "a%d", d->driveno); + return s; +} + +static int +identify(Drive *d) +{ + uchar oserial[21]; + ushort *id; + vlong osectors, s; + + id = d->info; + s = ahciidentify(&d->portc, id, &d->secsize, dnam(d)); + if(s == -1){ + d->state = Derror; + return -1; + } + osectors = d->sectors; + memmove(oserial, d->serial, sizeof d->serial); + + d->sectors = s; + + idmove(d->serial, id+10, 20); + idmove(d->firmware, id+23, 8); + idmove(d->model, id+27, 40); + d->wwn = idwwn(d->portc.m, id); + + if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)) + d->drivechange = 1; + + return 0; +} + +static void +clearci(Aport *p) +{ + if(p->cmd & Ast){ + p->cmd &= ~Ast; + p->cmd |= Ast; + } +} + +static int +fmtσ(Fmt *f) +{ + Drive *d; + char buf[8]; + + d = va_arg(f->args, Drive*); + if(d == nil) + snprint(buf, sizeof buf, "s[nil]"); + else + snprint(buf, sizeof buf, "a%d", d->driveno); + return fmtstrcpy(f, buf); +} + +static int +intel(Ctlr *c) +{ + return c->pci->vid == 0x8086; +} + +static int +ignoreahdrs(Drive *d) +{ + return d->portm.feat & Datapi && d->ctlr->type->type == Tsb600; +} + +static void +updatedrive(Drive *d) +{ + u32int f, cause, serr, s0, pr, ewake; + Aport *p; + static u32int last; + + pr = 1; + ewake = 0; + f = 0; + p = d->port; + cause = p->isr; + if(d->ctlr->type->type == Tjmicron) + cause &= ~Aifs; + serr = p->serror; + p->isr = cause; + + if(p->ci == 0){ + f |= Fdone; + pr = 0; + }else if(cause & Adps) + pr = 0; + if(cause & Ifatal){ + ewake = 1; + dprint("%s: fatal\n", dnam(d)); + } + if(cause & Adhrs){ + if(p->task & 33){ + if(ignoreahdrs(d) && serr & ErrE) + f |= Fahdrs; + dprint("%s: Adhrs cause %ux serr %ux task %ux\n", + dnam(d), cause, serr, p->task); + f |= Ferror; + ewake = 1; + } + pr = 0; + } + if(p->task & 1 && last != cause) + dprint("%s: err ca %ux serr %ux task %ux sstat %.3ux\n", + dnam(d), cause, serr, p->task, p->sstatus); + if(pr) + dprint("%s: upd %ux ta %ux\n", dnam(d), cause, p->task); + + if(cause & (Aprcs|Aifs)){ + s0 = d->state; + switch(p->sstatus & Smask){ + case Smissing: + d->state = Dmissing; + break; + case Spresent: + if((p->sstatus & Imask) == Islumber) + d->state = Dnew; + else + d->state = Derror; + break; + case Sphylink: + /* power mgnt crap for suprise removal */ + p->ie |= Aprcs|Apcs; /* is this required? */ + d->state = Dreset; + break; + case Sbist: + d->state = Doffline; + break; + } + dprint("%s: %s → %s [Apcrs] %.3ux\n", dnam(d), dstate(s0), + dstate(d->state), p->sstatus); + if(s0 == Dready && d->state != Dready) + idprint("%s: pulled\n", dnam(d)); + if(d->state != Dready) + f |= Ferror; + if(d->state != Dready || p->ci) + ewake = 1; + } + p->serror = serr; + if(ewake) + clearci(p); + if(f){ + d->portm.flag = f; + wakeup(&d->portm); + } + last = cause; +} + +static void +pstatus(Drive *d, u32int s) +{ + /* + * bogus code because the first interrupt is currently dropped. + * likely my fault. serror is maybe cleared at the wrong time. + */ + if(s) + d->lastseen = Ticks; + switch(s){ + default: + print("%s: pstatus: bad status %.3ux\n", dnam(d), s); + case Smissing: + d->state = Dmissing; + break; + case Spresent: + break; + case Sphylink: + d->wait = 0; + d->state = Dnew; + break; + case Sbist: + d->state = Doffline; + break; + } +} + +static int +configdrive(Drive *d) +{ + if(ahciconfigdrive(d->ctlr->hba, &d->portc, d->mode) == -1) + return -1; + ilock(d); + pstatus(d, d->port->sstatus & Smask); + iunlock(d); + return 0; +} + +static void +resetdisk(Drive *d) +{ + uint state, det, stat; + Aport *p; + + p = d->port; + det = p->sctl & 7; + stat = p->sstatus & Smask; + state = (p->cmd>>28) & 0xf; + dprint("%s: resetdisk: icc %ux det %.3ux sdet %.3ux\n", dnam(d), state, det, stat); + + ilock(d); + state = d->state; + if(d->state != Dready || d->state != Dnew) + d->portm.flag |= Ferror; + clearci(p); /* satisfy sleep condition. */ + wakeup(&d->portm); + d->state = Derror; + iunlock(d); + + if(stat != Sphylink){ + setstate(d, Dportreset); + return; + } + + qlock(&d->portm); + if(p->cmd&Ast && ahciswreset(&d->portc) == -1) + setstate(d, Dportreset); /* get a bigger stick. */ + else{ + setstate(d, Dmissing); + configdrive(d); + } + dprint("%s: resetdisk: %s → %s\n", dnam(d), dstate(state), dstate(d->state)); + qunlock(&d->portm); +} + +static int +newdrive(Drive *d) +{ + char *s; + Aportc *c; + Aportm *m; + + c = &d->portc; + m = &d->portm; + + qlock(c->m); + setfissig(m, c->p->sig); + if(identify(d) == -1){ + dprint("%s: identify failure\n", dnam(d)); + goto lose; + } + if(settxmode(c, m->udma) == -1){ + dprint("%s: can't set udma mode\n", dnam(d)); + goto lose; + } + if(m->feat & Dpower && setfeatures(c, 0x85, 3*1000) == -1){ + m->feat &= ~Dpower; + if(ahcirecover(c) == -1) + goto lose; + } + + setstate(d, Dready); + + qunlock(c->m); + + s = ""; + if(m->feat & Dllba) + s = "L"; + idprint("%s: %sLBA %,lld sectors\n", dnam(d), s, d->sectors); + idprint(" %s %s %s %s\n", d->model, d->firmware, d->serial, + d->drivechange? "[newdrive]": ""); + return 0; + +lose: + idprint("%s: can't be initialized\n", dnam(d)); + setstate(d, Dnull); + qunlock(c->m); + return -1; +} + +static void +hangck(Drive *d) +{ + if((d->portm.feat & Datapi) == 0 && d->active && + d->totick != 0 && (long)(Ticks - d->totick) > 0){ + print("%s: drive hung; resetting [%ux] ci %ux\n", + dnam(d), d->port->task, d->port->ci); + d->state = Dreset; + } +} + +static ushort olds[NCtlr*NCtlrdrv]; + +static int +doportreset(Drive *d) +{ + int i; + + i = -1; + qlock(&d->portm); + if(ahciportreset(&d->portc, d->mode) == -1) + dprint("ahci: ahciportreset fails\n"); + else + i = 0; + qunlock(&d->portm); + dprint("ahci: portreset → %s [task %.4ux ss %.3ux]\n", + dstate(d->state), d->port->task, d->port->sstatus); + return i; +} + +/* drive must be locked */ +static void +statechange(Drive *d) +{ + switch(d->state){ + case Dnull: + case Doffline: + case Dready: + d->wait = 0; + } +} + +static uint +maxmode(Ctlr *c) +{ + return (c->hba->cap & 0xf*Hiss)/Hiss; +} + +static void +checkdrive(Drive *d, int i) +{ + ushort s, sig; + + ilock(d); + s = d->port->sstatus; + if(s) + d->lastseen = Ticks; + if(s != olds[i]){ + dprint("%s: status: %.3ux -> %.3ux: %s\n", + dnam(d), olds[i], s, dstate(d->state)); + olds[i] = s; + d->wait = 0; + } + hangck(d); + switch(d->state){ + case Dnull: + case Dready: + break; + case Dmissing: + case Dnew: + switch(s & (Iactive|Smask)){ + case Spresent: + ahciwakeup(&d->portc, d->mode); + case Smissing: + break; + default: + dprint("%s: unknown status %.3ux\n", dnam(d), s); + /* fall through */ + case Iactive: /* active, no device */ + if(++d->wait&Mphywait) + break; +reset: + if(d->mode == 0) + d->mode = maxmode(d->ctlr); + else + d->mode--; + if(d->mode == DMautoneg){ + d->state = Dportreset; + goto portreset; + } + dprint("%s: reset; new mode %s\n", dnam(d), + modes[d->mode]); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Iactive | Sphylink: + if((++d->wait&Midwait) == 0){ + dprint("%s: slow reset %.3ux task=%ux; %d\n", + dnam(d), s, d->port->task, d->wait); + goto reset; + } + s = (uchar)d->port->task; + sig = d->port->sig >> 16; + if(s == 0x7f || s&ASbsy || + (sig != 0xeb14 && (s & ASdrdy) == 0)) + break; + iunlock(d); + newdrive(d); + ilock(d); + break; + } + break; + case Doffline: + if(d->wait++ & Mcomrwait) + break; + /* fallthrough */ + case Derror: + case Dreset: + dprint("%s: reset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Dportreset: +portreset: + if(d->wait++ & 0xff && (s & Iactive) == 0) + break; + dprint("%s: portreset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + d->portm.flag |= Ferror; + clearci(d->port); + wakeup(&d->portm); + if((s & Smask) == 0){ + d->state = Dmissing; + break; + } + iunlock(d); + doportreset(d); + ilock(d); + break; + } + statechange(d); + iunlock(d); +} + +Rendez kprocr; + +static void +satakproc(void) +{ + int i; + + for(;;){ + xtsleep(&kprocr, no, 0, Nms); + for(i = 0; i < niadrive; i++) + checkdrive(iadrive[i], i); + } +} + +static void +iainterrupt(Ureg*, void *a) +{ + int i; + u32int cause, m; + Ctlr *c; + Drive *d; + + c = a; + ilock(c); + cause = c->hba->isr; + for(i = 0; cause; i++){ + m = 1 << i; + if((cause & m) == 0) + continue; + cause &= ~m; + d = c->rawdrive + i; + ilock(d); + if(d->port->isr && c->pi & m) + updatedrive(d); + c->hba->isr = m; + iunlock(d); + } + iunlock(c); +} + +/* returns locked list! */ +static Alist* +ahcibuild(Aportm *m, int rw, void *data, uint n, vlong lba) +{ + uchar *c; + uint flags; + Alist *l; + + l = m->list; + c = m->ctab->cfis; + rwfis(m, c, rw, n, lba); + flags = Lpref; + if(rw) + flags |= Lwrite; + mkalist(m, flags, data, 512*n); + return l; +} + +static int +waitready(Drive *d) +{ + ulong s, i, δ; + + for(i = 0; i < 15000; i += 250){ + if(d->state == Dreset || d->state == Dportreset || + d->state == Dnew) + return 1; + δ = Ticks - d->lastseen; + if(d->state == Dnull || δ > 10*1000) + return -1; + ilock(d); + s = d->port->sstatus; + iunlock(d); + if((s & Imask) == 0 && δ > 1500) + return -1; + if(d->state == Dready && (s & Smask) == Sphylink) + return 0; + esleep(250); + } + print("%s: not responding; offline\n", dnam(d)); + ilock(d); + d->state = Doffline; + iunlock(d); + return -1; +} + +static int +lockready(Drive *d) +{ + int i; + + qlock(&d->portm); + while ((i = waitready(d)) == 1) { + qunlock(&d->portm); + esleep(1); + qlock(&d->portm); + } + return i; +} + +static int +io(Drive *d, uint proto, int to, int) +{ + uint task, flag, rv; + Aport *p; + Asleep as; + + switch(waitready(d)){ + case -1: + return SDeio; + case 1: + return SDretry; + } + + ilock(d); + d->portm.flag = 0; + iunlock(d); + p = d->port; + p->ci = 1; + + as.p = p; + as.i = 1; + d->totick = 0; + if(to > 0) + d->totick = Ticks + MS2TK(to) | 1; /* fix fencepost */ + d->active++; + + xsleep(&d->portm, ahciclear, &as); + + d->active--; + ilock(d); + flag = d->portm.flag; + task = d->port->task; + iunlock(d); + + rv = SDok; + if(proto & Ppkt){ + rv = task >> 8 + 4 & 0xf; + flag &= ~Fahdrs; + flag |= Fdone; + }else if(task & (Efatal<<8) || task & (ASbsy|ASdrq) && d->state == Dready){ + d->port->ci = 0; + ahcirecover(&d->portc); + task = d->port->task; + flag &= ~Fdone; /* either an error or do-over */ + } + if(flag == 0){ + print("%s: retry\n", dnam(d)); + return SDretry; + } + if(flag & (Fahdrs | Ferror)){ + if((task & Eidnf) == 0) + print("%s: i/o error %ux\n", dnam(d), task); + return SDcheck; + } + return rv; +} + +static int +rw(Drive *d, int rw, uchar *a, ulong len, uvlong lba) +{ + int n, try, status, max, count; + uchar *data; + Ctlr *c; + + count = len / d->secsize; + c = d->ctlr; + if(d->portm.feat & Datapi){ + print("%s: no atapi support\n", dnam(d)); + return -1; + } + + max = 128; + if(d->portm.feat & Dllba){ + max = 8192; /* ahci maximum */ + if(c->type->type == Tsb600) + max = 255; /* errata */ + } + data = a; + for(try = 0; try < 10; esleep(50)){ + n = count; + if(n > max) + n = max; + qlock(&d->portm); + ahcibuild(&d->portm, rw, data, n, lba); + status = io(d, Pdma, 5000, 0); + qunlock(&d->portm); + switch(status){ + case SDeio: + return -1; + case SDretry: + try++; + continue; + } + try = 0; + count -= n; + lba += n; + data += n * d->secsize; + if(count == 0) + return data - (uchar*)a; + } + print("%s: bad disk\n", dnam(d)); + return -1; +} + +/* configure drives 0-5 as ahci sata (c.f. errata) */ +static int +iaahcimode(Pcidev *p) +{ + uint u; + + u = pcicfgr16(p, 0x92); + dprint("ahci: iaahcimode %.2ux %.4ux\n", pcicfgr8(p, 0x91), u); + pcicfgw16(p, 0x92, u | 0xf); /* ports 0-15 (sic) */ + return 0; +} + +enum{ + Ghc = 0x04/4, /* global host control */ + Pi = 0x0c/4, /* ports implemented */ + Cmddec = 1<<15, /* enable command block decode */ + + /* Ghc bits */ + Ahcien = 1<<31, /* ahci enable */ +}; + +static void +iasetupahci(Ctlr *c) +{ + pcicfgw16(c->pci, 0x40, pcicfgr16(c->pci, 0x40) & ~Cmddec); + pcicfgw16(c->pci, 0x42, pcicfgr16(c->pci, 0x42) & ~Cmddec); + + c->lmmio[Ghc] |= Ahcien; + c->lmmio[Pi] = (1 << 6) - 1; /* 5 ports (supposedly ro pi reg) */ + + /* enable ahci mode; from ich9 datasheet */ + pcicfgw16(c->pci, 0x90, 1<<6 | 1<<5); +} + +static void +sbsetupahci(Pcidev *p) +{ + print("sbsetupahci: tweaking %.4ux ccru %.2ux ccrp %.2ux\n", + p->did, p->ccru, p->ccrp); + pcicfgw8(p, 0x40, pcicfgr8(p, 0x40) | 1); + pcicfgw8(p, PciCCRu, 6); + pcicfgw8(p, PciCCRp, 1); + p->ccru = 6; + p->ccrp = 1; +} + +static ushort itab[] = { + 0xfffc, 0x2680, Tesb, + 0xfffb, 0x27c1, Tahci, /* 82801g[bh]m */ + 0xffff, 0x2821, Tahci, /* 82801h[roh] */ + 0xfffe, 0x2824, Tahci, /* 82801h[b] */ + 0xfeff, 0x2829, Tahci, /* ich8 */ + 0xfffe, 0x2922, Tahci, /* ich9 */ + 0xffff, 0x3a02, Tahci, /* 82801jd/do */ + 0xfefe, 0x3a22, Tahci, /* ich10, pch */ + 0xfff7, 0x3b28, Tahci, /* pchm */ + 0xfffe, 0x3b22, Tahci, /* pch */ +}; + +static int +didtype(Pcidev *p) +{ + int type, i; + + type = Tahci; + switch(p->vid){ + default: + return -1; + case 0x8086: + for(i = 0; i < nelem(itab); i += 3) + if((p->did & itab[i]) == itab[i+1]) + return itab[i+2]; + break; + case 0x1002: + if(p->ccru == 1 || p->ccrp != 1) + if(p->did == 0x4380 || p->did == 0x4390) + sbsetupahci(p); + type = Tsb600; + break; + case 0x1106: + /* + * unconfirmed report that the programming + * interface is set incorrectly. + */ + if(p->did == 0x3349) + return Tahci; + break; + case 0x10de: + case 0x1039: + case 0x1b4b: + case 0x11ab: + break; + case 0x197b: + case 0x10b9: + type = Tjmicron; + break; + } + if(p->ccrb == Pcibcstore && (uchar)p->ccru == 6 && p->ccrp == 1) + return type; + return -1; +} + +static void +iapnp(void) +{ + int i, n, nunit, type; + uintmem io; + Ctlr *c; + Pcidev *p; + Drive *d; + + memset(olds, 0xff, sizeof olds); + p = nil; +loop: + while((p = pcimatch(p, 0, 0)) != nil){ + if((type = didtype(p)) == -1) + continue; + if(p->mem[Abar].bar == 0) + continue; + if(niactlr == NCtlr){ + print("iapnp: %s: too many controllers\n", cttab[type].name); + break; + } + c = iactlr + niactlr; + memset(c, 0, sizeof *c); + io = p->mem[Abar].bar & ~0xfull; + c->mmio = vmap(io, p->mem[Abar].size); + if(c->mmio == nil){ + print("%s: address %#p in use did %.4ux\n", + tnam(c), io, p->did); + continue; + } + c->lmmio = (u32int*)c->mmio; + c->pci = p; + c->type = cttab + type; + pcisetbme(c->pci); + + if(intel(c) && p->did != 0x2681) + iasetupahci(c); + ahcibioshandoff((Ahba*)c->mmio); +// ahcihbareset((Ahba*)c->mmio); + nunit = ahciconf(c); + c->pi = c->hba->pi; + if(0 && p->vid == 0x1002 && p->did == 0x4391){ + c->pi = 0x3f; /* noah's opteron */ + nunit = 6; + } + if(intel(c) && iaahcimode(p) == -1 || nunit < 1){ + vunmap(c->mmio, p->mem[Abar].size); + continue; + } + c->ndrive = nunit; + + i = (c->hba->cap >> 21) & 1; + print("%s: sata-%s with %d ports\n", + tnam(c), "I\0II" + i*2, nunit); + + /* map the drives -- they don't all need to be enabled. */ + memset(c->rawdrive, 0, sizeof c->rawdrive); + n = 0; + for(i = 0; i < NCtlrdrv; i++){ + d = c->rawdrive + i; + d->portno = i; + d->driveno = -1; + d->sectors = 0; + d->serial[0] = ' '; + d->ctlr = c; + if((c->pi & 1<name, sizeof d->name, "a%d", niadrive + n); + d->port = (Aport*)(c->mmio + 0x80*i + 0x100); + d->portc.p = d->port; + d->portc.m = &d->portm; + d->driveno = n++; + c->drive[d->driveno] = d; + iadrive[niadrive + d->driveno] = d; + } + for(i = 0; i < n; i++) + if(ahciidle(c->drive[i]->port) == -1){ + print("%s: port %d wedged; abort\n", + tnam(c), i); + goto loop; + } + for(i = 0; i < n; i++){ + c->drive[i]->mode = DMautoneg; + configdrive(c->drive[i]); + } + + intrenable(p->intl, iainterrupt, c, p->tbdf, "iasata"); + ahcienable(c->hba); + + // do we want to do this here? + for(i = 0; i < n; i++) + checkdrive(c->drive[i], i); + + niadrive += nunit; + niactlr++; + } + userinit(satakproc, 0, "iasata"); +} + +static void +statc(Ctlr *c) +{ + Drive *d; + int j; + + for(j = 0; j < c->ndrive; j++){ + d = c->drive[j]; + if(d->fflag == 0) + continue; + print("%s:\n", dnam(d)); + print(" r\t%W\n", d->rate+Read); + print(" w\t%W\n", d->rate+Write); + print(" r %uld w %uld\n", d->reads, d->writes); + } +} + +static void +cmd_stat(int, char*[]) +{ + int i; + + for(i = 0; i < niactlr; i++) + statc(iactlr+i); +} + +static Drive* +iadev(Device *d) +{ + int i, j; + Drive *dr; + + i = d->wren.ctrl; + j = d->wren.targ; + + for(; i < niactlr; i++){ + if(j < iactlr[i].ndrive){ + dr = iactlr[i].drive[j]; + if(dr->state&Dready) + return dr; + return 0; + } + j -= iactlr[i].ndrive; + } + panic("ia: bad drive %Z\n", d); + return 0; +} + +void +iainit0(void) +{ + fmtinstall(L'σ', fmtσ); + iapnp(); + if(niactlr > 0){ + cmd_install("statr", "-- intel sata stats", cmd_stat); + } +} + +void +iainit(Device *dv) +{ + Drive *d; + vlong s; + char *lba; + static int once; + + if(once++ == 0) + iainit0(); + +top: + d = iadev(dv); + if(d == nil || d->secsize == 0){ + print("\t\t" "a%d.%d.%d not ready yet\n", dv->wren.ctrl, dv->wren.targ, dv->wren.lun); + + /* can't waitmsec(); what if no u? */ + for(int i = 0; i < 500; i++) + delay(1); + goto top; + } + + if(d->init++ == 0){ + dofilter(d->rate+Read); + dofilter(d->rate+Write); + } + + s = d->sectors; + lba = ""; + if(d->portm.feat&Dllba) + lba = "L"; + print("\t\t" "%lld sectors/%lld blocks %sLBA\n", s, s/(RBUFSIZE/d->secsize), lba); + d->lastseen = Ticks; /* hack around boot timing */ +} + + +Devsize +iasize(Device *dv) +{ + Drive *d; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 0; + + return d->sectors/(RBUFSIZE/d->secsize); +} + +int +iaread(Device *dv, Devsize b, void *c) +{ + Drive *d; + int rv; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 1; + + rv = rw(d, 0, c, RBUFSIZE, b*(RBUFSIZE/d->secsize)); + if(rv != RBUFSIZE) + return 1; + d->rate[Read].count++; + d->reads++; + d->fflag = 1; + return 0; +} + +int +iawrite(Device *dv, Devsize b, void *c) +{ + Drive *d; + int rv; + + d = iadev(dv); + if(d == nil || d->secsize == 0) + return 1; + + rv = rw(d, 1, c, RBUFSIZE, b*(RBUFSIZE/d->secsize)); + if(rv != RBUFSIZE) + return 1; + d->rate[Write].count++; + d->writes++; + d->fflag = 1; + return 0; +} --- /sys/src/fs/amd64/ahci.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/ahci.h Tue Aug 27 16:11:04 2013 @@ -0,0 +1,341 @@ +/* + * advanced host controller interface (sata) + * © 2007-9 coraid, inc + */ + +/* pci configuration */ +enum { + Abar = 5, +}; + +/* + * ahci memory configuration + * + * 0000-0023 generic host control + * 0024-009f reserved + * 00a0-00ff vendor specific. + * 0100-017f port 0 + * ... + * 1080-1100 port 31 + */ + +/* cap bits: supported features */ +enum { + H64a = 1<<31, /* 64-bit addressing */ + Hncq = 1<<30, /* ncq */ + Hsntf = 1<<29, /* snotification reg. */ + Hmps = 1<<28, /* mech pres switch */ + Hss = 1<<27, /* staggered spinup */ + Halp = 1<<26, /* aggressive link pm */ + Hal = 1<<25, /* activity led */ + Hclo = 1<<24, /* command-list override */ + Hiss = 1<<20, /* for interface speed */ + Ham = 1<<18, /* ahci-mode only */ + Hpm = 1<<17, /* port multiplier */ + Hfbs = 1<<16, /* fis-based switching */ + Hpmb = 1<<15, /* multiple-block pio */ + Hssc = 1<<14, /* slumber state */ + Hpsc = 1<<13, /* partial-slumber state */ + Hncs = 1<<8, /* n command slots */ + Hcccs = 1<<7, /* coal */ + Hems = 1<<6, /* enclosure mgmt. */ + Hxs = 1<<5, /* external sata */ + Hnp = 1<<0, /* n ports */ +}; + +/* ghc bits */ +enum { + Hae = 1<<31, /* enable ahci */ + Hie = 1<<1, /* " interrupts */ + Hhr = 1<<0, /* hba reset */ +}; + +/* cap2 bits */ +enum { + Apts = 1<<2, /* automatic partial to slumber */ + Nvmp = 1<<1, /* nvmhci present; nvram */ + Boh = 1<<0, /* bios/os handoff supported */ +}; + +/* emctl bits */ +enum { + Pm = 1<<27, /* port multiplier support */ + Alhd = 1<<26, /* activity led hardware driven */ + Xonly = 1<<25, /* rx messages not supported */ + Smb = 1<<24, /* single msg buffer; rx limited */ + Esgpio = 1<<19, /* sgpio messages supported */ + Eses2 = 1<<18, /* ses-2 supported */ + Esafte = 1<<17, /* saf-te supported */ + Elmt = 1<<16, /* led msg types support */ + Emrst = 1<<9, /* reset all em logic */ + Tmsg = 1<<8, /* transmit message */ + Mr = 1<<0, /* message rx'd */ + Emtype = Esgpio | Eses2 | Esafte | Elmt, +}; + +/* bios bits */ +enum { + Bb = 1<<4, /* bios cleaning up for change */ + Ooc = 1<<3, /* os ownership change */ + Sooe = 1<<2, /* smi on ownership change enable */ + Oos = 1<<1, /* os owned semaphore */ + Bos = 1<<0, /* bios owned semaphore */ +}; + +typedef struct { + u32int cap; + u32int ghc; + u32int isr; + u32int pi; /* ports implemented */ + u32int ver; + u32int ccc; /* coaleasing control */ + u32int cccports; + u32int emloc; + u32int emctl; + u32int cap2; + u32int bios; +} Ahba; + +enum { + Acpds = 1<<31, /* cold port detect status */ + Atfes = 1<<30, /* task file error status */ + Ahbfs = 1<<29, /* hba fatal */ + Ahbds = 1<<28, /* hba error (parity error) */ + Aifs = 1<<27, /* interface fatal §6.1.2 */ + Ainfs = 1<<26, /* interface error (recovered) */ + Aofs = 1<<24, /* too many bytes from disk */ + Aipms = 1<<23, /* incorrect prt mul status */ + Aprcs = 1<<22, /* PhyRdy change status Pxserr.diag.n */ + Adpms = 1<<7, /* mechanical presence status */ + Apcs = 1<<6, /* port connect diag.x */ + Adps = 1<<5, /* descriptor processed */ + Aufs = 1<<4, /* unknown fis diag.f */ + Asdbs = 1<<3, /* set device bits fis received w/ i bit set */ + Adss = 1<<2, /* dma setup */ + Apio = 1<<1, /* pio setup fis */ + Adhrs = 1<<0, /* device to host register fis */ + + IEM = Acpds|Atfes|Ahbds|Ahbfs|Ahbds|Aifs|Ainfs|Aprcs|Apcs|Adps| + Aufs|Asdbs|Adss|Adhrs, + Ifatal = Atfes|Ahbfs|Ahbds|Aifs, +}; + +/* serror bits */ +enum { + SerrX = 1<<26, /* exchanged */ + SerrF = 1<<25, /* unknown fis */ + SerrT = 1<<24, /* transition error */ + SerrS = 1<<23, /* link sequence */ + SerrH = 1<<22, /* handshake */ + SerrC = 1<<21, /* crc */ + SerrD = 1<<20, /* not used by ahci */ + SerrB = 1<<19, /* 10-tp-8 decode */ + SerrW = 1<<18, /* comm wake */ + SerrI = 1<<17, /* phy internal */ + SerrN = 1<<16, /* phyrdy change */ + + ErrE = 1<<11, /* internal */ + ErrP = 1<<10, /* ata protocol violation */ + ErrC = 1<<9, /* communication */ + ErrT = 1<<8, /* transient */ + ErrM = 1<<1, /* recoverd comm */ + ErrI = 1<<0, /* recovered data integrety */ + + ErrAll = ErrE|ErrP|ErrC|ErrT|ErrM|ErrI, + SerrAll = SerrX|SerrF|SerrT|SerrS|SerrH|SerrC|SerrD|SerrB|SerrW| + SerrI|SerrN|ErrAll, + SerrBad = 0x7f<<19, +}; + +/* cmd register bits */ +enum { + Aicc = 1<<28, /* interface communcations control. 4 bits */ + Aasp = 1<<27, /* aggressive slumber & partial sleep */ + Aalpe = 1<<26, /* aggressive link pm enable */ + Adlae = 1<<25, /* drive led on atapi */ + Aatapi = 1<<24, /* device is atapi */ + Apste = 1<<23, /* automatic slumber to partial cap */ + Afbsc = 1<<22, /* fis-based switching capable */ + Aesp = 1<<21, /* external sata port */ + Acpd = 1<<20, /* cold presence detect */ + Ampsp = 1<<19, /* mechanical pres. */ + Ahpcp = 1<<18, /* hot plug capable */ + Apma = 1<<17, /* pm attached */ + Acps = 1<<16, /* cold presence state */ + Acr = 1<<15, /* cmdlist running */ + Afr = 1<<14, /* fis running */ + Ampss = 1<<13, /* mechanical presence switch state */ + Accs = 1<<8, /* current command slot 12:08 */ + Afre = 1<<4, /* fis enable receive */ + Aclo = 1<<3, /* command list override */ + Apod = 1<<2, /* power on dev (requires cold-pres. detect) */ + Asud = 1<<1, /* spin-up device; requires ss capability */ + Ast = 1<<0, /* start */ + + Arun = Ast|Acr|Afre|Afr, + Apwr = Apod|Asud, +}; + +/* sctl register bits */ +enum { + Aipm = 1<<8, /* interface power mgmt. 3=off */ + Aspd = 1<<4, + Adet = 1<<0, /* device detection */ +}; + +/* sstatus register bits */ +enum{ + /* sstatus det */ + Smissing = 0<<0, + Spresent = 1<<0, + Sphylink = 3<<0, + Sbist = 4<<0, + Smask = 7<<0, + + /* sstatus speed */ + Gmissing = 0<<4, + Gi = 1<<4, + Gii = 2<<4, + Giii = 3<<4, + Gmask = 7<<4, + + /* sstatus ipm */ + Imissing = 0<<8, + Iactive = 1<<8, + Isleepy = 2<<8, + Islumber = 6<<8, + Imask = 7<<8, + + SImask = Smask | Imask, + SSmask = Smask | Isleepy, +}; + +#define sstatus scr0 +#define sctl scr2 +#define serror scr1 +#define sactive scr3 +#define ntf scr4 + +typedef struct { + u32int list; /* PxCLB must be 1kb aligned */ + u32int listhi; + u32int fis; /* 256-byte aligned */ + u32int fishi; + u32int isr; + u32int ie; /* interrupt enable */ + u32int cmd; + u32int res1; + u32int task; + u32int sig; + u32int scr0; + u32int scr2; + u32int scr1; + u32int scr3; + u32int ci; /* command issue */ + u32int scr4; + u32int fbs; + u32int res2[11]; + u32int vendor[4]; +} Aport; + +/* in host's memory; not memory mapped */ +typedef struct { + uchar *base; + uchar *d; + uchar *p; + uchar *r; + uchar *u; + u32int *devicebits; +} Afis; + +enum { + Lprdtl = 1<<16, /* physical region descriptor table len */ + Lpmp = 1<<12, /* port multiplier port */ + Lclear = 1<<10, /* clear busy on R_OK */ + Lbist = 1<<9, + Lreset = 1<<8, + Lpref = 1<<7, /* prefetchable */ + Lwrite = 1<<6, + Latapi = 1<<5, + Lcfl = 1<<0, /* command fis length in double words */ +}; + +/* in hosts memory; memory mapped */ +typedef struct { + u32int flags; + u32int len; + u32int ctab; + u32int ctabhi; + uchar reserved[16]; +} Alist; + +typedef struct { + u32int dba; + u32int dbahi; + u32int pad; + u32int count; +} Aprdt; + +typedef struct { + uchar cfis[0x40]; + uchar atapi[0x10]; + uchar pad[0x30]; + Aprdt prdt; +} Actab; + +/* enclosure message header */ +enum { + Mled = 0, + Msafte = 1, + Mses2 = 2, + Msgpio = 3, +}; + +enum { + Ledmsz = 8, +}; + +typedef struct { + uchar dummy; + uchar msize; + uchar dsize; + uchar type; + uchar hba; /* bits 0:4 are the port */ + uchar pm; + uchar led[2]; +} Aledmsg; + +enum { + Aled = 1<<0, + Locled = 1<<3, + Errled = 1<<6, + + Ledoff = 0, + Ledon = 1, +}; + +typedef struct { + uint encsz; + u32int *enctx; + u32int *encrx; +} Aenc; + +enum { + Ferror = 1, + Fdone = 2, +}; + +typedef struct { + QLock; + Rendez; + uchar flag; + Sfis; + Afis fis; + Alist *list; + Actab *ctab; +} Aportm; + +typedef struct { + Aport *p; + Aportm *m; +} Aportc; --- /sys/src/fs/amd64/fpu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/fpu.c Tue Aug 27 16:11:04 2013 @@ -0,0 +1,31 @@ +#include "all.h" +#include "amd64.h" +#include + +void +fpunm(Ureg *ureg, void *) +{ + panic("cpu%d: #NM: %#p", m->machno, ureg->ip); +} + +void +fpumf(Ureg *ureg, void *) +{ + panic("cpu%d: #MF: %#p", m->machno, ureg->ip); +} + +void +fpuxf(Ureg *ureg, void *) +{ + panic("cpu%d: #XF: %#p", m->machno, ureg->ip); +} + +void +fpuinit(void) +{ + if(m->machno == 0){ + trapenable(IdtNM, fpunm, 0, "#NM"); + trapenable(IdtMF, fpumf, 0, "#MF"); + trapenable(IdtXF, fpuxf, 0, "#XF"); + } +} --- /sys/src/fs/amd64/ether8169.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/ether8169.c Tue Aug 27 16:11:05 2013 @@ -0,0 +1,1100 @@ +/* + * Realtek RTL8110S/8169S. + * Mostly there. There are some magic register values used + * which are not described in any datasheet or driver but seem + * to be necessary. + * No tuning has been done. Only tested on an RTL8110S, there + * are slight differences between the chips in the series so some + * tweaks may be needed. + */ +#include "all.h" +#include "io.h" +#include "../ip/ip.h" +#include "etherif.h" +//#include "ethermii.h" + +#define dprint(...) print("ether 8169: " __VA_ARGS__); + +enum { /* registers */ + Idr0 = 0x00, /* MAC address */ + Mar0 = 0x08, /* Multicast address */ + Dtccr = 0x10, /* Dump Tally Counter Command */ + Tnpds = 0x20, /* Transmit Normal Priority Descriptors */ + Thpds = 0x28, /* Transmit High Priority Descriptors */ + Flash = 0x30, /* Flash Memory Read/Write */ + Erbcr = 0x34, /* Early Receive Byte Count */ + Ersr = 0x36, /* Early Receive Status */ + Cr = 0x37, /* Command Register */ + Tppoll = 0x38, /* Transmit Priority Polling */ + Imr = 0x3C, /* Interrupt Mask */ + Isr = 0x3E, /* Interrupt Status */ + Tcr = 0x40, /* Transmit Configuration */ + Rcr = 0x44, /* Receive Configuration */ + Tctr = 0x48, /* Timer Count */ + Mpc = 0x4C, /* Missed Packet Counter */ + Cr9346 = 0x50, /* 9346 Command Register */ + Config0 = 0x51, /* Configuration Register 0 */ + Config1 = 0x52, /* Configuration Register 1 */ + Config2 = 0x53, /* Configuration Register 2 */ + Config3 = 0x54, /* Configuration Register 3 */ + Config4 = 0x55, /* Configuration Register 4 */ + Config5 = 0x56, /* Configuration Register 5 */ + Timerint = 0x58, /* Timer Interrupt */ + Mulint = 0x5C, /* Multiple Interrupt Select */ + Phyar = 0x60, /* PHY Access */ + Tbicsr0 = 0x64, /* TBI Control and Status */ + Tbianar = 0x68, /* TBI Auto-Negotiation Advertisment */ + Tbilpar = 0x6A, /* TBI Auto-Negotiation Link Partner */ + Phystatus = 0x6C, /* PHY Status */ + + Rms = 0xDA, /* Receive Packet Maximum Size */ + Cplusc = 0xE0, /* C+ Command */ + Coal = 0xE2, /* Interrupt Mitigation (Coalesce) */ + Rdsar = 0xE4, /* Receive Descriptor Start Address */ + Etx = 0xEC, /* Early Transmit Threshold */ +}; + +enum { /* Dtccr */ + Cmd = 0x00000008, /* Command */ +}; + +enum { /* Cr */ + Te = 0x04, /* Transmitter Enable */ + Re = 0x08, /* Receiver Enable */ + Rst = 0x10, /* Software Reset */ +}; + +enum { /* Tppoll */ + Fswint = 0x01, /* Forced Software Interrupt */ + Npq = 0x40, /* Normal Priority Queue polling */ + Hpq = 0x80, /* High Priority Queue polling */ +}; + +enum { /* Imr/Isr */ + Rok = 0x0001, /* Receive OK */ + Rer = 0x0002, /* Receive Error */ + Tok = 0x0004, /* Transmit OK */ + Ter = 0x0008, /* Transmit Error */ + Rdu = 0x0010, /* Receive Descriptor Unavailable */ + Punlc = 0x0020, /* Packet Underrun or Link Change */ + Fovw = 0x0040, /* Receive FIFO Overflow */ + Tdu = 0x0080, /* Transmit Descriptor Unavailable */ + Swint = 0x0100, /* Software Interrupt */ + Timeout = 0x4000, /* Timer */ + Serr = 0x8000, /* System Error */ +}; + +enum { /* Tcr */ + MtxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MtxdmaMASK = 0x00000700, + Mtxdmaunlimited = 0x00000700, + Acrc = 0x00010000, /* Append CRC (not) */ + Lbk0 = 0x00020000, /* Loopback Test 0 */ + Lbk1 = 0x00040000, /* Loopback Test 1 */ + Ifg2 = 0x00080000, /* Interframe Gap 2 */ + HwveridSHIFT = 23, /* Hardware Version ID */ + HwveridMASK = 0x7C800000, + Macv01 = 0x00000000, /* RTL8169 */ + Macv02 = 0x00800000, /* RTL8169S/8110S */ + Macv03 = 0x04000000, /* RTL8169S/8110S */ + Macv04 = 0x10000000, /* RTL8169SB/8110SB */ + Macv05 = 0x18000000, /* RTL8169SC/8110SC */ + Macv07 = 0x24800000, /* RTL8102e */ + Macv07a = 0x34800000, /* RTL8102e */ + Macv11 = 0x30000000, /* RTL8168B/8111B */ + Macv12 = 0x38000000, /* RTL8169B/8111B */ + Macv13 = 0x34000000, /* RTL8101E */ + Macv14 = 0x30800000, /* RTL8100E */ + Macv15 = 0x38800000, /* RTL8100E */ + Macv19 = 0x3c000000, /* RTL8111c-gr */ + Macv25 = 0x28000000, /* RTL8168D */ + Macv26 = 0x48000000, /* RTL8111/8168B */ + Macv27 = 0x2c800000, /* RTL8111e */ + Macv28 = 0x2c000000, /* RTL8111/8168B */ + Macv29 = 0x40800000, /* RTL8101/8102E */ + Ifg0 = 0x01000000, /* Interframe Gap 0 */ + Ifg1 = 0x02000000, /* Interframe Gap 1 */ +}; + +enum { /* Rcr */ + Aap = 0x00000001, /* Accept All Packets */ + Apm = 0x00000002, /* Accept Physical Match */ + Am = 0x00000004, /* Accept Multicast */ + Ab = 0x00000008, /* Accept Broadcast */ + Ar = 0x00000010, /* Accept Runt */ + Aer = 0x00000020, /* Accept Error */ + Sel9356 = 0x00000040, /* 9356 EEPROM used */ + MrxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MrxdmaMASK = 0x00000700, + Mrxdmaunlimited = 0x00000700, + RxfthSHIFT = 13, /* Receive Buffer Length */ + RxfthMASK = 0x0000E000, + Rxfth256 = 0x00008000, + Rxfthnone = 0x0000E000, + Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */ + MulERINT = 0x01000000, /* Multiple Early Interrupt Select */ +}; + +enum { /* Cr9346 */ + Eedo = 0x01, /* */ + Eedi = 0x02, /* */ + Eesk = 0x04, /* */ + Eecs = 0x08, /* */ + Eem0 = 0x40, /* Operating Mode */ + Eem1 = 0x80, +}; + +enum { /* Phyar */ + DataMASK = 0x0000FFFF, /* 16-bit GMII/MII Register Data */ + DataSHIFT = 0, + RegaddrMASK = 0x001F0000, /* 5-bit GMII/MII Register Address */ + RegaddrSHIFT = 16, + PhyFlag = 0x80000000, /* */ +}; + +enum { /* Phystatus */ + Fd = 0x01, /* Full Duplex */ + Linksts = 0x02, /* Link Status */ + Speed10 = 0x04, /* */ + Speed100 = 0x08, /* */ + Speed1000 = 0x10, /* */ + Rxflow = 0x20, /* */ + Txflow = 0x40, /* */ + Entbi = 0x80, /* */ +}; + +enum { /* Cplusc */ + Mulrw = 0x0008, /* PCI Multiple R/W Enable */ + Dac = 0x0010, /* PCI Dual Address Cycle Enable */ + Rxchksum = 0x0020, /* Receive Checksum Offload Enable */ + Rxvlan = 0x0040, /* Receive VLAN De-tagging Enable */ + Endian = 0x0200, /* Endian Mode */ +}; + +typedef struct D D; /* Transmit/Receive Descriptor */ +struct D { + u32int control; + u32int vlan; + u32int addrlo; + u32int addrhi; +}; + +enum { /* Transmit Descriptor control */ + TxflMASK = 0x0000FFFF, /* Transmit Frame Length */ + TxflSHIFT = 0, + Tcps = 0x00010000, /* TCP Checksum Offload */ + Udpcs = 0x00020000, /* UDP Checksum Offload */ + Ipcs = 0x00040000, /* IP Checksum Offload */ + Lgsen = 0x08000000, /* TSO; WARNING: contains lark's vomit */ +}; + +enum { /* Receive Descriptor control */ + RxflMASK = 0x00001FFF, /* Receive Frame Length */ + Tcpf = 0x00004000, /* TCP Checksum Failure */ + Udpf = 0x00008000, /* UDP Checksum Failure */ + Ipf = 0x00010000, /* IP Checksum Failure */ + Pid0 = 0x00020000, /* Protocol ID0 */ + Pid1 = 0x00040000, /* Protocol ID1 */ + Crce = 0x00080000, /* CRC Error */ + Runt = 0x00100000, /* Runt Packet */ + Res = 0x00200000, /* Receive Error Summary */ + Rwt = 0x00400000, /* Receive Watchdog Timer Expired */ + Fovf = 0x00800000, /* FIFO Overflow */ + Bovf = 0x01000000, /* Buffer Overflow */ + Bar = 0x02000000, /* Broadcast Address Received */ + Pam = 0x04000000, /* Physical Address Matched */ + Mar = 0x08000000, /* Multicast Address Received */ +}; + +enum { /* General Descriptor control */ + Ls = 0x10000000, /* Last Segment Descriptor */ + Fs = 0x20000000, /* First Segment Descriptor */ + Eor = 0x40000000, /* End of Descriptor Ring */ + Own = 0x80000000, /* Ownership */ +}; + +/* + */ +enum { /* Ring sizes (<= 1024) */ + Ntd = 64, /* Transmit Ring */ + Nrd = 256, /* Receive Ring */ + + Stdbuf = 1536, + Mtu = 7000, /* performance limited */ + Mps = Mtu + 8 + 14, /* if(mtu>ETHERMAXTU) */ +// Mps = ROUNDUP(ETHERMAXTU+4, 128), +}; + +typedef struct Dtcc Dtcc; +struct Dtcc { + u64int txok; + u64int rxok; + u64int txer; + u32int rxer; + u16int misspkt; + u16int fae; + u32int tx1col; + u32int txmcol; + u64int rxokph; + u64int rxokbrd; + u32int rxokmu; + u16int txabt; + u16int txundrn; +}; + +enum { /* Variants */ + Rtl8100e = (0x8136<<16)|0x10EC, /* RTL810[01]E: pci -e */ + Rtl8169c = (0x0116<<16)|0x16EC, /* RTL8169C+ (USR997902) */ + Rtl8169sc = (0x8167<<16)|0x10EC, /* RTL8169SC */ + Rtl8168b = (0x8168<<16)|0x10EC, /* RTL8168B: pci-e */ + Rtl8169 = (0x8169<<16)|0x10EC, /* RTL8169 */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + QLock alock; /* attach */ + Lock ilock; /* init */ + int init; /* */ + + int pciv; /* */ + int macv; /* MAC version */ + int phyv; /* PHY version */ + int pcie; /* flag: pci-express device? */ + +// Mii* mii; + + Lock tlock; /* transmit */ + D* td; /* descriptor ring */ + Msgbuf** tb; /* transmit buffers */ + int ntd; + + int tdh; /* head - producer index (host) */ + int tdt; /* tail - consumer index (NIC) */ + int ntdfree; + int ntq; + + Lock rlock; /* receive */ + D* rd; /* descriptor ring */ + Msgbuf** rb; /* receive buffers */ + int nrd; + + int rdh; /* head - producer index (NIC) */ + int rdt; /* tail - consumer index (host) */ + int nrdfree; + + int tcr; /* transmit configuration register */ + int rcr; /* receive configuration register */ + int imr; + + QLock slock; /* statistics */ + Dtcc* dtcc; + uint txdu; + uint tcpf; + uint udpf; + uint ipf; + uint fovf; + uint ierrs; + uint rer; + uint rdu; + uint punlc; + uint fovw; +} Ctlr; + +static Ctlr* rtl8169ctlrhead; +static Ctlr* rtl8169ctlrtail; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (u8int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (u16int)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (u32int)(l))) + +#ifdef notdef +static int +·rtl8169miimir(Ctlr *ctlr, int pa, int ra) +{ + uint r; + int timeo; + + assert(pa == 1); + r = (ra<<16) & RegaddrMASK; + csr32w(ctlr, Phyar, r); + delay(1); + for(timeo = 0; timeo < 2000; timeo++){ + if((r = csr32r(ctlr, Phyar)) & Flag) + break; + microdelay(100); + } + if(!(r & Flag)) + return -1; + + return (r & DataMASK)>>DataSHIFT; +} + +static int +rtl8169miimir(Mii *mii, int pa, int ra) +{ + if(pa != 1) + return -1; + return ·rtl8169miimir(mii->ctlr, pa, ra); +} + +static int +·rtl8169miimiw(Ctlr *ctlr, int pa, int ra, int data) +{ + uint r; + int timeo; + + assert(pa == 1); + r = Flag|((ra<<16) & RegaddrMASK)|((data<ctlr, pa, ra, data); +} + +static Mii* +rtl8169mii(Ctlr* ctlr) +{ + Mii* mii; + MiiPhy *phy; + + /* + * Link management. + * + * Get rev number out of Phyidr2 so can config properly. + * There's probably more special stuff for Macv0[234] needed here. + */ + ctlr->phyv = ·rtl8169miimir(ctlr, 1, Phyidr2) & 0x0F; + if(ctlr->macv == Macv02){ + csr8w(ctlr, 0x82, 1); /* magic */ + ·rtl8169miimiw(ctlr, 1, 0x0B, 0x0000); /* magic */ + } + if((mii = miiattach(ctlr, (1<<1), rtl8169miimir, rtl8169miimiw)) == nil) + return nil; + + phy = mii->curphy; + switch(ctlr->macv){ + case Macv28: + rtl8169miimiw(ctlr->mii, 1, 0x1f, 0); /* power up phy */ + rtl8169miimiw(ctlr->mii, 1, 0x1e, 0); + } + dprint("oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n", + phy->oui, phy->phyno, ctlr->macv, ctlr->phyv); + + if(miistatus(mii) < 0){ + miireset(mii); + miiane(mii, ~0, ~0, ~0); + } + + return mii; +} + +#endif + +static void +rtl8169halt(Ctlr* ctlr) +{ + csr8w(ctlr, Cr, 0); + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); +} + +static int +rtl8169reset(Ctlr* ctlr) +{ + u32int r; + int timeo; + + /* + * Soft reset the controller. + */ + csr8w(ctlr, Cr, Rst); + for(r = timeo = 0; timeo < 1000; timeo++){ + r = csr8r(ctlr, Cr); + if(!(r & Rst)) + break; + delay(1); + } + rtl8169halt(ctlr); + + if(r & Rst) + return -1; + return 0; +} + +static void +rtl8169replenish(Ctlr* ctlr) +{ + D *d; + int rdt; + Msgbuf *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + d = &ctlr->rd[rdt]; + if(ctlr->rb[rdt] == nil){ + /* + * Simple allocation for now. + * This better be aligned on 8. + */ + bp = mballoc(Mps, 0, Mbeth1); + if(bp == nil){ + print("no available buffers\n"); + break; + } + ctlr->rb[rdt] = bp; + d->addrlo = Pciwaddrl(bp->data); + d->addrhi = Pciwaddrh(bp->data); + }else + print("i8169: rx overrun\n"); + coherence(); + d->control |= Own|Mtu; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->nrdfree++; + } + ctlr->rdt = rdt; +} + +static int +rtl8169init(Ether* edev) +{ + int i; + u32int r; + Msgbuf *bp; + Ctlr *ctlr; + u8int cplusc; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + rtl8169halt(ctlr); + + /* + * MAC Address is not settable on some (all?) chips. + * Must put chip into config register write enable mode. + */ + csr8w(ctlr, Cr9346, Eem1|Eem0); + + /* + * Transmitter. + */ + memset(ctlr->td, 0, sizeof(D)*ctlr->ntd); + ctlr->tdh = ctlr->tdt = 0; + ctlr->td[ctlr->ntd-1].control = Eor; + + /* + * Receiver. + * Need to do something here about the multicast filter. + */ + memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd); + ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0; + ctlr->rd[ctlr->nrd-1].control = Eor; + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + mbfree(bp); + } + rtl8169replenish(ctlr); + ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm; + + /* + * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst + * settings in Tcr/Rcr; the (1<<14) is magic. + */ + cplusc = csr16r(ctlr, Cplusc) & ~(1<<14); + cplusc |= Rxchksum | Mulrw; + switch(ctlr->macv){ + default: + panic("8169init: unknown macv: %.8ux", ctlr->macv); + case Macv01: + break; + case Macv02: + case Macv03: + cplusc |= 1<<14; /* magic */ + break; + case Macv05: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + r = csr8r(ctlr, Config2) & 0x07; + if(r == 0x01) /* 66MHz PCI */ + csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */ + else + csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */ + pciclrmwi(ctlr->pcidev); + break; + case Macv13: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + pcicfgw8(ctlr->pcidev, 0x68, 0x00); /* magic */ + pcicfgw8(ctlr->pcidev, 0x69, 0x08); /* magic */ + break; + case Macv04: + case Macv07: + case Macv07a: + case Macv11: + case Macv12: + case Macv14: + case Macv15: + case Macv19: + case Macv25: + case Macv26: + case Macv27: + case Macv28: + case Macv29: + break; + } + + /* + * Enable receiver/transmitter. + * Need to do this first or some of the settings below + * won't take. + */ + switch(ctlr->pciv){ + default: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + csr32w(ctlr, Mar0, 0); + csr32w(ctlr, Mar0+4, 0); + case Rtl8169sc: + case Rtl8168b: + break; + } + + /* + * Interrupts. + * Disable Tdu|Tok for now, the transmit routine will tidy. + * Tdu means the NIC ran out of descriptors to send, so it + * doesn't really need to ever be on. + */ + csr32w(ctlr, Timerint, 0); + ctlr->imr = Serr|Timeout|Fovw|Punlc|Rdu|Ter|Rer|Rok; + csr16w(ctlr, Imr, ctlr->imr); + + /* + * Clear missed-packet counter; + * clear early transmit threshold value; + * set the descriptor ring base addresses; + * set the maximum receive packet size; + * no early-receive interrupts. + * + * note: the maximum rx size is a filter. the size of the buffer + * in the descriptor ring is still honored. we will toss >Mtu + * packets because they've been fragmented into multiple + * rx buffers. + */ + csr32w(ctlr, Mpc, 0); + csr8w(ctlr, Etx, 0x3f); + csr32w(ctlr, Tnpds+4, Pciwaddrh(ctlr->td)); + csr32w(ctlr, Tnpds, Pciwaddrl(ctlr->td)); + csr32w(ctlr, Rdsar+4, Pciwaddrh(ctlr->rd)); + csr32w(ctlr, Rdsar, Pciwaddrl(ctlr->rd)); + csr16w(ctlr, Rms, Mtu); /* was Mps; see above comment */ + r = csr16r(ctlr, Mulint) & 0xF000; /* no early rx interrupts */ + csr16w(ctlr, Mulint, r); + csr16w(ctlr, Cplusc, cplusc); + csr16w(ctlr, Coal, 0); + + /* + * Set configuration. + */ + switch(ctlr->pciv){ + case Rtl8169sc: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + case Rtl8168b: + case Rtl8169c: + csr16w(ctlr, Cplusc, 0x2000); /* magic */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + } + ctlr->tcr = csr32r(ctlr, Tcr); + csr8w(ctlr, Cr9346, 0); + + iunlock(&ctlr->ilock); + +// rtl8169mii(ctlr); + + return 0; +} + +static void +rtl8169attach(Ether* edev) +{ +// int timeo, firsta; + Ctlr *ctlr; +// MiiPhy *phy; + + ctlr = edev->ctlr; +// firsta = 0; + qlock(&ctlr->alock); + if(ctlr->init == 0){ + /* + * Handle allocation/init errors here. + */ + ctlr->td = ialloc(sizeof(D)*Ntd, 256); + ctlr->tb = ialloc(Ntd*sizeof(Msgbuf*), 0); + ctlr->ntd = Ntd; + ctlr->rd = ialloc(sizeof(D)*Nrd, 256); + ctlr->rb = ialloc(Nrd*sizeof(Msgbuf*), 0); + ctlr->nrd = Nrd; + ctlr->dtcc = ialloc(sizeof(Dtcc), 64); + rtl8169init(edev); + ctlr->init = 1; +// firsta = 1; + } + qunlock(&ctlr->alock); + + /* + * Wait for link to be ready. why here? + */ +#ifdef notdef + if(firsta){ + for(timeo = 0; timeo < 350; timeo += 10){ + if(miistatus(ctlr->mii) == 0) + break; + tsleep(&up->sleep, return0, 0, 10); + } + phy = ctlr->mii->curphy; + dprint("%s: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, phy->rfc, phy->tfc); + } +#endif +} + +static void +rtl8169link(Ether* edev) +{ + USED(edev); + return; +#ifdef notdef + int limit; + Ctlr *ctlr; + MiiPhy *phy; + + ctlr = edev->ctlr; + + /* + * Maybe the link changed - do we care very much? + * Could stall transmits if no link, maybe? + */ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) + return; + + phy = ctlr->mii->curphy; + if(miistatus(ctlr->mii) < 0){ + dprint("%slink n: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + edev->link = 0; + return; + } + edev->link = 1; + + limit = 256*1024; + if(phy->speed == 10){ + edev->mbps = 10; + limit = 65*1024; + } + else if(phy->speed == 100) + edev->mbps = 100; + else if(phy->speed == 1000) + edev->mbps = 1000; + dprint("%slink y: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + + if(edev->oq != nil) + qsetlimit(edev->oq, limit); +#endif +} + +static void +rtl8169transmit(Ether* edev) +{ + D *d; + Msgbuf *bp; + Ctlr *ctlr; + int control, x; + + ctlr = edev->ctlr; + + ilock(&ctlr->tlock); + for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){ + d = &ctlr->td[x]; + if((control = d->control) & Own) + break; + + /* + * Check errors and log here. + */ + USED(control); + + /* + * Free it up. + * Need to clean the descriptor here? Not really. + * Simple freeb for now (no chain and freeblist). + * Use ntq count for now. + */ + mbfree(ctlr->tb[x]); + ctlr->tb[x] = nil; + d->control &= Eor; + + ctlr->ntq--; + } + ctlr->tdh = x; + + x = ctlr->tdt; + while(ctlr->ntq < (ctlr->ntd-1)){ + if((bp = etheroq(edev)) == nil) + break; + + d = &ctlr->td[x]; + d->addrlo = Pciwaddrl(bp->data); + d->addrhi = Pciwaddrh(bp->data); + ctlr->tb[x] = bp; + coherence(); + d->control |= Own|Fs|Ls|bp->count; + + x = NEXT(x, ctlr->ntd); + ctlr->ntq++; + } + if(x != ctlr->tdt){ + ctlr->tdt = x; + csr8w(ctlr, Tppoll, Npq); + } + else if(ctlr->ntq >= (ctlr->ntd-1)) + ctlr->txdu++; + + iunlock(&ctlr->tlock); +} + +static void +rtl8169receive(Ether* edev) +{ + D *d; + int rdh; + Msgbuf *bp; + Ctlr *ctlr; + u32int control; + + ctlr = edev->ctlr; + + rdh = ctlr->rdh; + for(;;){ + d = &ctlr->rd[rdh]; + + if(d->control & Own) + break; + + control = d->control; + if((control & (Fs|Ls|Res)) == (Fs|Ls)){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->count = (control & RxflMASK)-4; + bp->next = nil; + + if(control & Fovf) + ctlr->fovf++; + + switch(control & (Pid1|Pid0)){ + default: + break; + case Pid0: + if(control & Tcpf){ + ctlr->tcpf++; + break; + } + bp->flags |= Btcpck; + break; + case Pid1: + if(control & Udpf){ + ctlr->udpf++; + break; + } + bp->flags |= Budpck; + break; + case Pid1|Pid0: + if(control & Ipf){ + ctlr->ipf++; + break; + } + bp->flags |= Bipck; + break; + } + etheriq(edev, bp); + } + else{ + // if(!(control & Res)) + // ctlr->frag++; + /* iprint("i8169: control %#.8ux\n", control); */ + mbfree(ctlr->rb[rdh]); + } + d->control &= Eor; + ctlr->nrdfree--; + rdh = NEXT(rdh, ctlr->nrd); + + if(ctlr->nrdfree < ctlr->nrd/2) + rtl8169replenish(ctlr); + } + ctlr->rdh = rdh; +} + +static void +rtl8169interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + u32int isr; + + edev = arg; + ctlr = edev->ctlr; + + while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){ + csr16w(ctlr, Isr, isr); + if((isr & ctlr->imr) == 0) + break; + if(isr & (Fovw|Punlc|Rdu|Rer|Rok)){ + rtl8169receive(edev); + if(!(isr & (Punlc|Rok))) + ctlr->ierrs++; + if(isr & Rer) + ctlr->rer++; + if(isr & Rdu) + ctlr->rdu++; + if(isr & Punlc) + ctlr->punlc++; + if(isr & Fovw) + ctlr->fovw++; + isr &= ~(Fovw|Rdu|Rer|Rok); + } + + if(isr & (Tdu|Ter|Tok)){ + rtl8169transmit(edev); + isr &= ~(Tdu|Ter|Tok); + } + + if(isr & Punlc){ + rtl8169link(edev); + isr &= ~Punlc; + } + + /* + * Some of the reserved bits get set sometimes... + */ + if(isr & (Serr|Timeout|Tdu|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok)) + panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux", + csr16r(ctlr, Imr), isr); + } +} + +int +vetmacv(Ctlr *ctlr, uint *macv) +{ + *macv = csr32r(ctlr, Tcr) & HwveridMASK; + switch(*macv){ + default: + return -1; + case Macv01: + case Macv02: + case Macv03: + case Macv04: + case Macv05: + case Macv07: + case Macv07a: + case Macv11: + case Macv12: + case Macv13: + case Macv14: + case Macv15: + case Macv19: + case Macv25: + case Macv26: + case Macv27: + case Macv28: + case Macv29: + break; + } + return 0; +} + +static void +rtl8169pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, port, pcie; + uint macv; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + pcie = 0; + switch(i = ((p->did<<16)|p->vid)){ + default: + continue; + case Rtl8100e: /* RTL810[01]E ? */ + case Rtl8168b: /* RTL8168B */ + pcie = 1; + break; + case Rtl8169c: /* RTL8169C */ + case Rtl8169sc: /* RTL8169SC */ + case Rtl8169: /* RTL8169 */ + break; + case (0xC107<<16)|0x1259: /* Corega CG-LAPCIGT */ + i = Rtl8169; + break; + } + + port = p->mem[0].bar & ~0x01; +// if(ioalloc(port, p->mem[0].size, 0, "rtl8169") < 0){ +// print("rtl8169: port %#ux in use\n", port); +// continue; +// } + + ctlr = ialloc(sizeof(Ctlr), 0); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->pciv = i; + ctlr->pcie = pcie; + + if(vetmacv(ctlr, &macv) == -1){ +// iofree(port); +// free(ctlr); + print("rtl8169: unknown mac %.4ux %.8ux\n", p->did, macv); + continue; + } + +#ifdef notdef + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } +#endif + + if(rtl8169reset(ctlr)){ +// iofree(port); +// free(ctlr); + continue; + } + + /* + * Extract the chip hardware version, + * needed to configure each properly. + */ + ctlr->macv = macv; + +// rtl8169mii(ctlr); + + pcisetbme(p); + + if(rtl8169ctlrhead != nil) + rtl8169ctlrtail->next = ctlr; + else + rtl8169ctlrhead = ctlr; + rtl8169ctlrtail = ctlr; + } +} + +int +rtl8169pnp(Ether* edev) +{ + u32int r; + Ctlr *ctlr; + uchar ea[Easize]; + static int once; + + if(once == 0){ + once = 1; + rtl8169pci(); + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 100; + switch(ctlr->macv){ + case Macv28: + edev->ifc.maxmtu = 1514; + break; + default: + edev->ifc.maxmtu = Mtu; + } + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the device and set in edev->ea. + */ + memset(ea, 0, Easize); + if(memcmp(ea, edev->ea, Easize) == 0){ + r = csr32r(ctlr, Idr0); + edev->ea[0] = r; + edev->ea[1] = r>>8; + edev->ea[2] = r>>16; + edev->ea[3] = r>>24; + r = csr32r(ctlr, Idr0+4); + edev->ea[4] = r; + edev->ea[5] = r>>8; + } + + edev->attach = rtl8169attach; + edev->transmit = rtl8169transmit; + edev->interrupt = rtl8169interrupt; + rtl8169link(edev); + + return 0; +} --- /sys/src/fs/amd64/acpi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/amd64/acpi.c Tue Aug 27 16:11:05 2013 @@ -0,0 +1,572 @@ +#include "all.h" +#include "io.h" +#include "mp.h" +#include "apic.h" +#include + +typedef struct Rsd Rsd; +typedef struct Tbl Tbl; + +struct Rsd { + uchar sig[8]; + uchar csum; + uchar oemid[6]; + uchar rev; + uchar raddr[4]; + uchar len[4]; + uchar xaddr[8]; + uchar xcsum; + uchar reserved[3]; +}; + +struct Tbl { + uchar sig[4]; + uchar len[4]; + uchar rev; + uchar csum; + uchar oemid[6]; + uchar oemtid[8]; + uchar oemrev[4]; + uchar cid[4]; + uchar crev[4]; + uchar data[]; +}; + +enum { + Tblsz = 4+4+1+1+6+8+4+4+4, + Rdsz = 8+1+6+1+4+4+8+1+3, +}; + +static Rsd *rsd; +static int ntblpa; /* physical addresses visited by maptable() */ +static uintmem tblpa[64]; +static int ntblmap; /* successfully mapped tables */ +static Tbl *tblmap[64]; + +#define DBG(...) if(0){print(__VA_ARGS__);}while(0) + +static int +checksum(void *v, int n) +{ + uchar *p, s; + + s = 0; + p = v; + while(n-- > 0) + s += *p++; + return s; +} + +/* + * typical leakage ~40kb. + */ +void* +amlalloc(usize n) +{ + void *p; + + if((p = ialloc(n, 0)) != nil){ + // setmalloctag(&p, getcallerpc(&n)); + // setrealloctag(&p, 0); + } + return p; +} + +void +amlfree(void *p) +{ + USED(p); +// free(p); +} + +#define get16(p) getle((p), 2) +#define get32(p) getle((p), 4) +#define get64(p) getle((p), 8) + +extern uvlong getle(uchar*, int); + +static uint +tbldlen(Tbl *t) +{ + return get32(t->len) - Tblsz; +} + +static Tbl* +findtable(void *sig) +{ + int i; + for(i=0; isig, sig, 4) == 0) + return tblmap[i]; + return nil; +} + + +/* argument is uvlong to prevent caller from caring */ +static void +maptable(uvlong xpa) +{ + uchar *p, *e; + int i; + uintmem pa; + u32int l; + Tbl *t; + + pa = xpa; + if(pa != xpa || pa == 0) + return; + if(ntblpa >= nelem(tblpa) || ntblmap >= nelem(tblmap)) + return; + for(i=0; ilen); + if(l < Tblsz){ + vunmap(t, 8); + return; + } + vunmap(t, 8); + if((t = vmap(pa, l)) == nil) + return; + if(checksum(t, l)){ + vunmap(t, l); + return; + } + tblmap[ntblmap++] = t; + + p = (uchar*)t; + e = p + l; + if(memcmp("RSDT", t->sig, 4) == 0){ + for(p = t->data; p+3 < e; p += 4) + maptable(get32(p)); + return; + } + if(memcmp("XSDT", t->sig, 4) == 0){ + for(p = t->data; p+7 < e; p += 8) + maptable(get64(p)); + return; + } + if(memcmp("FACP", t->sig, 4) == 0){ + if(l < 44) + return; + maptable(get32(p + 40)); + if(l < 148) + return; + maptable(get64(p + 140)); + return; + } +} + +static void* +rsdscan(uchar* addr, int len, char* signature) +{ + int sl; + uchar *e, *p; + + e = addr+len; + sl = strlen(signature); + for(p = addr; p+sl < e; p += 16){ + if(memcmp(p, signature, sl)) + continue; + return p; + } + + return nil; +} + +static void* +rsdsearch(char* signature) +{ + uintptr p; + uchar *bda; + Rsd *rsd; + + /* + * Search for the data structure signature: + * 1) in the first KB of the EBDA; + * 2) in the BIOS ROM between 0xE0000 and 0xFFFFF. + */ + if(strncmp((char*)KADDR(0xFFFD9), "EISA", 4) == 0){ + bda = BIOSSEG(0x40); + if((p = (bda[0x0F]<<8)|bda[0x0E])){ + if(rsd = rsdscan(KADDR(p), 1024, signature)) + return rsd; + } + } + return rsdscan(BIOSSEG(0xE000), 0x20000, signature); +} + +static void +loadrsd(void) +{ + if((rsd = rsdsearch("RSD PTR ")) == nil) + panic("acpi: no rsd ptr"); + if(checksum(rsd, 20) && checksum(rsd, 36)) + panic("acpi: acpi checksum"); +} + +static void +maptables(void) +{ + loadrsd(); + if(ntblmap > 0 || ntblpa > 0) + return; + if(!checksum(rsd, 20)) + maptable(get32(rsd->raddr)); + if(rsd->rev >= 2) + if(!checksum(rsd, 36)) + maptable(get64(rsd->xaddr)); +} + +#define Lintr Localintr +enum { + Iointr, + Lintr, + + MTint = 0, /* fake interrupt type, equivalent to fixed */ +}; + +static u32int +apicmkintr(uint src, uint inttype, int polarity, int trigger, uint apicno, uint intin) +{ + u32int v; + Apic *apic; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(apicno != 0xff){ + if(Napic < 256 && apicno >= Napic){ + print("apic: id out-of-range: %d\n", apicno); + return 0; + } + switch(src){ + default: + print("apic: intin botch: %d\n", intin); + return 0; + case Iointr: + if((apic = ioapiclookup(apicno)) == nil){ + print("ioapic%d: ioapic unusable\n", apicno); + return 0; + } + if(intin >= apic->nrdt){ + print("ioapic%d: intin %d >= nrdt %d\n", apicno, intin, apic->nrdt); + return 0; + } + break; + case Lintr: + if((apic = lapiclookup(apicno)) == nil){ + print("lapic%d: lapic unusable\n", apicno); + return 0; + } + if(intin >= nelem(apic->lvt)){ + print("lapic%d: intin beyond lvt: %d\n", apicno, intin); + return 0; + } + USED(apic); + break; + } + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(inttype){ + default: + print("apic: bad irq type %d\n", inttype); + return 0; + case MTint: /* INT (fake type, same as fixed) */ + v |= polarity | trigger; + break; + case MTnmi: /* NMI */ + case MTsmi: /* SMI */ + case MTei: /* ExtINT */ + v |= TMedge|IPhigh|inttype; + break; + } + + return v; +} + +int +flagstopolarity(int bustype, int flags) +{ + switch(flags & 3){ + case 1: + return IPhigh; + case 3: + return IPlow; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return IPhigh; + case BusPCI: + return IPlow; + break; + default: + return -1; + } +} + +int +flagstotrigger(int bustype, int flags) +{ + switch((flags>>3) & 3){ + case 1: + return TMedge; + case 3: + return TMlevel; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return TMedge; + case BusPCI: + return TMlevel; + break; + default: + return -1; + } +} + +static void +addirq(int gsi, int bustype, int busno, int irq, int flags) +{ + uint apicno, intin, polarity, trigger; + u32int i; + + if((apicno = gsitoapicid(gsi, &intin)) == -1){ + print("acpi: addirq: no apic for gsi %d bus %d.%d\n", gsi, bustype, busno); + return; + } + DBG("addirq: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + polarity = flagstopolarity(bustype, flags); + trigger = flagstotrigger(bustype, flags); + if(polarity == -1 || trigger == -1){ + print("addirq: bad polarity: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + return; + } + + i = apicmkintr(Iointr, MTint, polarity, trigger, apicno, intin); +#ifndef MPS + ioapicintrinit(bustype, busno, apicno, intin, irq, i); +#endif +} + +static char* +eisaid(void *v) +{ + uint b, l; + int i; + static char id[8]; + + if(amltag(v) == 's') + return v; + b = amlint(v); + for(l = 0, i=24; i>=0; i -= 8, b >>= 8) + l |= (b & 0xFF) << i; + id[7] = 0; + for(i=6; i>=3; i--, l >>= 4) + id[i] = "0123456789ABCDEF"[l & 0xF]; + for(i=2; i>=0; i--, l >>= 5) + id[i] = '@' + (l & 0x1F); + return id; +} + +static int +pcibusno(void *dot) +{ + int bno, adr, tbdf; + Pcidev *pdev; + void *p, *x; + char *id; + + id = nil; + if(x = amlwalk(dot, "^_HID")){ + p = nil; + if(amleval(x, "", &p) == 0) + id = eisaid(p); + } + if((x = amlwalk(dot, "^_BBN")) == nil) + if((x = amlwalk(dot, "^_ADR")) == nil) + return -1; + p = nil; + if(amleval(x, "", &p) < 0) + return -1; + adr = amlint(p); + /* if root bridge, then we are done here */ + if(id != nil && (strcmp(id, "PNP0A03")==0 || strcmp(id, "PNP0A08")==0)) + return adr; + x = amlwalk(dot, "^"); + if(x == nil || x == dot) + return -1; + if((bno = pcibusno(x)) < 0) + return -1; + tbdf = MKBUS(BusPCI, bno, adr>>16, adr&0xFFFF); + pdev = pcimatchtbdf(tbdf); + if(pdev == nil){ + DBG("acpi: pcibusno: bridge not found: %τ\n", tbdf); + return -1; + } + if(pdev->bridge == nil){ + DBG("acpi: pcibusno: nothing bridged: %τ\n", tbdf); + return -1; + } + return BUSBNO(pdev->bridge->tbdf); +} + +static int +enumprt(void *dot, void *) +{ + void *p, **a, **b; + int bno, dno, pin, gsi; + int n, i; + + bno = pcibusno(dot); + if(bno < 0){ + DBG("enumprt: pci not found %V\n", dot); + return 1; + } + + /* evalulate _PRT method */ + p = nil; + if(amleval(dot, "", &p) < 0) + return 1; + if(amltag(p) != 'p') + return 1; + + n = amllen(p); + a = amlval(p); + for(i=0; i>16; + pin = amlint(b[1]); + if(amltag(b[2]) == 'N' || amlint(b[2]) != 0){ + print("enumprt: interrupt link not handled %V\n", b[2]); + continue; + } + gsi = amlint(b[3]); + addirq(gsi, BusPCI, bno, (dno<<2)|pin, 0); + } + return 1; +} + +static void +loadtbls(char *name, int all) +{ + int i; + Tbl *t; + + for(i = 0; i < ntblmap; i++){ + t = tblmap[i]; + if(memcmp(t->sig, name, 4) == 0){ + amlload(t->data, tbldlen(t)); + if(!all) + break; + } + } +} + +enum { + Lapicen = 1, +}; + +void +acpiinit(int maxmach) +{ + uchar *p, *e; + int i, c, nmach; + uintmem lapicbase; + Tbl *t; + + print("acpiinit\n"); + maptables(); + amlinit(); + loadtbls("DSDT", 0); + loadtbls("SSDT", 1); + + /* set APIC mode */ + amleval(amlwalk(amlroot, "_PIC"), "i", 1, nil); + if((t = findtable("APIC")) == nil) + panic("acpiinit: no APIC table"); + + p = t->data; + e = p + tbldlen(t); + lapicbase = get32(p); + p += 8; + + nmach = 0; + for(; p < e; p += c){ + c = p[1]; + if(c < 2 || (p+c) > e) + break; + switch(*p){ + case 0x00: /* Processor Local APIC */ + if(p[4] & Lapicen && conf.nmach < maxmach){ + lapicinit(p[3], lapicbase, nmach==0); + conf.nmach = ++nmach; + } + break; + case 0x01: /* I/O APIC */ + ioapicinit(p[2], get32(p+8), get32(p+4)); + break; + case 0x02: /* Interrupt Source Override */ + addirq(get32(p+4), BusISA, 0, p[3], get16(p+8)); + break; + case 0x03: /* NMI Source */ + print("acpi: ignoring nmi source\n"); + break; + case 0x04: /* Local APIC NMI */ + DBG("acpi: lapic nmi %.2ux flags %.4ux lint# %d (ignored)\n", + p[2], (uint)get16(p+3), p[5]); + break; + case 0x05: /* Local APIC Address Override */ + case 0x06: /* I/O SAPIC */ + case 0x07: /* Local SAPIC */ + case 0x08: /* Platform Interrupt Sources */ + case 0x09: /* Processor Local x2APIC */ + case 0x0A: /* x2APIC NMI */ + case 0x0B: /* GIC */ + case 0x0C: /* GICD */ + print("acpi: ignoring entry: %.2ux\n", *p); + break; + } + } + + /* look for PCI interrupt mappings */ + amlenum(amlroot, "_PRT", enumprt, nil); + + /* add identity mapped legacy isa interrupts */ + for(i=0; i<16; i++) + addirq(i, BusISA, 0, i, 0); + + /* free the AML interpreter */ + amlexit(); + + print("acpiinit: %d maches\n", nmach); +} --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 15:55:14 2013 @@ -0,0 +1,12 @@ +/* + * The most fundamental constant. + * The code will not compile with RBUFSIZE made a variable; + * for one thing, RBUFSIZE determines FEPERBUF, which determines + * the number of elements in a free-list-block array. + */ +enum{ + RBUFSIZE = 8*1024, /* raw buffer size */ +}; + +#include "../amd64/archdat.h" +#include "../port/portdat.h" --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 15:55:14 2013 @@ -0,0 +1,31 @@ +#include "all.h" + +#define NO 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +void +noream(Device*, int) +{ +} + +Devtab devtab[] = { +[Devnone] 'n', 0, NO + +[Devmcat] '(', ')', mcatread, mcatwrite, mcatsize, 0, 0, mwormream, 0, mcatinit, 0, 0, +[Devmlev] '[', ']', mlevread, mlevwrite, mlevsize, 0, 0, mwormream, 0, mlevinit, 0, 0, +[Devmirr] '{', '}', mirrread, mirrwrite, mirrsize, 0, 0, mwormream, 0, mirrinit, 0, 0, + +[Devcw] 'c', 0, cwread, cwwrite, cwsize, cwsaddr, cwraddr, cwream, cwrecover, cwinit, 0, 0, +[Devro] 'o', 0, roread, rowrite, cwsize, cwsaddr, cwraddr, 0, 0, roinit, 0, 0, +[Devia] 'a', 0, iaread, iawrite, iasize, 0, 0, noream, 0, iainit, 0, 0, +[Devaoe] 'e', 0, aoeread, aoewrite, aoesize, 0, 0, noream, 0, aoeinit, 0, 0, +[Devfworm] 'f', 0, NO //fwormread, fwormwrite, fwormsize, 0, 0, fwormream, 0, fworminit, 0, 0, +[Devide] 'h', 0, NO //ideread, idewrite, idesize, 0, 0, noream, 0, ideinit, idesecsize, 0, +[Devjuke] 'j', 0, NO //jukeread, jukewrite, jukesize, 0, 0, noream, 0, jukeinit, 0, 0, +[Devlworm] 'l', 0, NO //wormread, wormwrite, wormsize, 0,0, noream, 0, jukeinit, 0, 0, +[Devmv] 'm', 0, NO //mvread, mvwrite, mvsize, 0, 0, noream, 0, mvinit, 0, 0, +[Devpart] 'p', 0, partread, partwrite, partsize, 0, 0, noream, 0, partinit, 0, 0, +[Devworm] 'r', 0, NO //wormread, wormwrite, wormsize, 0,0, noream, 0, jukeinit, 0, 0, +[Devwren] 'w', 0, NO //wrenread, wrenwrite, wrensize, 0, 0, noream, 0, wreninit, 0, 0, +[Devswab] 'x', 0, NO //swabread, swabwrite, swabsize, swabsuper, swabraddr, swabream, swabrecover, swabinit, 0, 0, +[Devfloppy] 'y', 0, NO //flread, flwrite, flsize, 0, 0, noream, 0, floppyinit, 0, 0, +}; --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 15:55:14 2013 @@ -0,0 +1,38 @@ +#include "all.h" +#include "io.h" + +#include "../ip/ip.h" +#include "etherif.h" + +extern int etherga620reset(Ether*); +extern int ether21140reset(Ether*); +extern int etherelnk3reset(Ether*); +extern int etheri82557reset(Ether*); +extern int igbepnp(Ether *); +extern int dp83815reset(Ether*); +extern int dp83820pnp(Ether*); +extern int rtl8139pnp(Ether*); +extern int rtl8169pnp(Ether*); +extern int i82563reset(Ether*); +extern int i82598pnp(Ether*); +extern int m10gpnp(Ether*); + +Etherctlr etherctlr[] = { +// { "21140", ether21140reset, }, +// { "2114x", ether21140reset, }, +// { "3C509", etherelnk3reset, }, +// { "83815", dp83815reset, }, +// { "dp83820", dp83820pnp, }, +// { "elnk3", etherelnk3reset, }, +// { "ga620", etherga620reset, }, +// { "i82557", etheri82557reset, }, +// { "igbe", igbepnp, }, +// { "i82543", igbepnp, }, +// { "rtl8139", rtl8139pnp, }, + { "rtl8169", rtl8169pnp, }, + { "i82563", i82563reset }, + { "i82568", i82598pnp }, +// { "m10g", m10gpnp }, +}; + +int netherctlr = nelem(etherctlr); --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 15:55:14 2013 @@ -0,0 +1,108 @@ +#include "all.h" +#include "io.h" + +#include "dosfs.h" + +Timet mktime = DATE; +Startsb startsb[] ={ + "main", 2, + 0 +}; + +Dos dos; +extern void cmd_part(int, char**); +static Device *bootdev; + +/* goo because dos takes an int, not a Device* */ +Off +bootread(int, void *buf, long n, Devsize off) +{ + return byteio(bootdev, 0, off, n, buf); +} + +Off +bootwrite(int, void *buf, long n, Devsize off) +{ + return byteio(bootdev, 1, off, n, buf); +} + +Sys *sys = nil; + +void +otherinit(void) +{ + char buf[NAMELEN], *p, *v[2]; + Mpl s; + + etherinit(); +// apcinit(); + + s = spllo(); + if(!(p = getconf("nvr"))) + panic("no nvr"); + strncpy(buf, p, sizeof buf-2); + buf[sizeof buf-1] = 0; + if(getfields(buf, v, nelem(v), 0, "!") != 2) + panic("malformed nvr: %s\n", buf); + strcpy(nvrfile, v[1]); + if(!(bootdev = devstr(v[0]))) + panic("bad bootdev: %s", v[0]); + devinit(bootdev); + + print("%Z ! %s\n", bootdev, nvrfile); + + dos.dev = 0; + dos.read = bootread; + dos.write = bootwrite; + if(dosinit(&dos) < 0) + panic("can't init dos dosfs on %s\n", p); + splx(s); +} + + +void +touser(void) +{ + int i; + + /* + * mutiprocessing. ta-da! + */ + sys->epoch = rdtsc(); + active.thunderbirdsarego = 1; + + settime(rtctime()); + boottime = time(); + + print("sysinit\n"); + sysinit(); + + userinit(raheadsort, 0, "rahs"); + for(i=0; itext = "scp"; + synccopy(); +} + +void +localconfinit(void) +{ + conf.nodump = 0; + conf.ripoff = 1; + conf.nlgmsg = 2*1100; /* 8576 bytes, for packets */ + conf.nsmmsg = 2*500; /* 128 bytes */ + conf.nserve = 20; + conf.fastworm = 1; + conf.uartonly = 115200; +} --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 15:55:14 2013 @@ -0,0 +1,165 @@ +CONF=ivey +ARCH=amd64 +ZONE=/adm/timezone/US_Eastern +p=9 + +objtype=amd64 + $target + +install:V: /$objtype/$TARG + +iso cd: bootflp + { echo bootflp ; echo $TARG ; echo plan9.ini } > $TARG'proto' + rm -f $TARG.iso && disk/mk9660 -b bootflp -p $TARG'proto' $TARG.iso + +sizes: sizes.$O + $LD -o $target sizes.$O -lc + +$TARG.$O: ../$ARCH/dosfs.h + +%.$O: %.c + $CC $CFLAGS $stem.c + +%.$O: %.s + $AS $stem.s + +%.$O: $HFILES + +clean:V: + rm -f *.[$OS] [$OS].out bootflp *.iso timezone.h sizes $TARG^proto $TARG $ARCHCLEAN --- /sys/src/fs/ivey Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey Tue Aug 27 16:09:20 2013 @@ -0,0 +1,6 @@ +features +- per-mach scheduler + +bugs +- ether82563 works very poorly with the current etheri +- serial console on reboot flakey --- /sys/src/fs/ivey/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/mkfile Tue Aug 27 16:11:08 2013 @@ -0,0 +1,165 @@ +CONF=ivey +ARCH=amd64 +ZONE=/adm/timezone/US_Eastern +p=9 + +objtype=amd64 + $target + +install:V: /$objtype/$TARG + +iso cd: bootflp + { echo bootflp ; echo $TARG ; echo plan9.ini } > $TARG'proto' + rm -f $TARG.iso && disk/mk9660 -b bootflp -p $TARG'proto' $TARG.iso + +sizes: sizes.$O + $LD -o $target sizes.$O -lc + +$TARG.$O: ../$ARCH/dosfs.h + +%.$O: %.c + $CC $CFLAGS $stem.c + +%.$O: %.s + $AS $stem.s + +%.$O: $HFILES + +clean:V: + rm -f *.[$OS] [$OS].out bootflp *.iso timezone.h sizes $TARG^proto $TARG $ARCHCLEAN --- /sys/src/fs/ivey/ivey.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/ivey.c Tue Aug 27 16:11:09 2013 @@ -0,0 +1,108 @@ +#include "all.h" +#include "io.h" + +#include "dosfs.h" + +Timet mktime = DATE; +Startsb startsb[] ={ + "main", 2, + 0 +}; + +Dos dos; +extern void cmd_part(int, char**); +static Device *bootdev; + +/* goo because dos takes an int, not a Device* */ +Off +bootread(int, void *buf, long n, Devsize off) +{ + return byteio(bootdev, 0, off, n, buf); +} + +Off +bootwrite(int, void *buf, long n, Devsize off) +{ + return byteio(bootdev, 1, off, n, buf); +} + +Sys *sys = nil; + +void +otherinit(void) +{ + char buf[NAMELEN], *p, *v[2]; + Mpl s; + + etherinit(); +// apcinit(); + + s = spllo(); + if(!(p = getconf("nvr"))) + panic("no nvr"); + strncpy(buf, p, sizeof buf-2); + buf[sizeof buf-1] = 0; + if(getfields(buf, v, nelem(v), 0, "!") != 2) + panic("malformed nvr: %s\n", buf); + strcpy(nvrfile, v[1]); + if(!(bootdev = devstr(v[0]))) + panic("bad bootdev: %s", v[0]); + devinit(bootdev); + + print("%Z ! %s\n", bootdev, nvrfile); + + dos.dev = 0; + dos.read = bootread; + dos.write = bootwrite; + if(dosinit(&dos) < 0) + panic("can't init dos dosfs on %s\n", p); + splx(s); +} + + +void +touser(void) +{ + int i; + + /* + * mutiprocessing. ta-da! + */ + sys->epoch = rdtsc(); + active.thunderbirdsarego = 1; + + settime(rtctime()); + boottime = time(); + + print("sysinit\n"); + sysinit(); + + userinit(raheadsort, 0, "rahs"); + for(i=0; itext = "scp"; + synccopy(); +} + +void +localconfinit(void) +{ + conf.nodump = 0; + conf.ripoff = 1; + conf.nlgmsg = 2*1100; /* 8576 bytes, for packets */ + conf.nsmmsg = 2*500; /* 128 bytes */ + conf.nserve = 20; + conf.fastworm = 1; + conf.uartonly = 115200; +} --- /sys/src/fs/ivey/dat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/dat.h Tue Aug 27 16:11:09 2013 @@ -0,0 +1,12 @@ +/* + * The most fundamental constant. + * The code will not compile with RBUFSIZE made a variable; + * for one thing, RBUFSIZE determines FEPERBUF, which determines + * the number of elements in a free-list-block array. + */ +enum{ + RBUFSIZE = 8*1024, /* raw buffer size */ +}; + +#include "../amd64/archdat.h" +#include "../port/portdat.h" --- /sys/src/fs/ivey/devtab.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/devtab.c Tue Aug 27 16:11:09 2013 @@ -0,0 +1,31 @@ +#include "all.h" + +#define NO 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +void +noream(Device*, int) +{ +} + +Devtab devtab[] = { +[Devnone] 'n', 0, NO + +[Devmcat] '(', ')', mcatread, mcatwrite, mcatsize, 0, 0, mwormream, 0, mcatinit, 0, 0, +[Devmlev] '[', ']', mlevread, mlevwrite, mlevsize, 0, 0, mwormream, 0, mlevinit, 0, 0, +[Devmirr] '{', '}', mirrread, mirrwrite, mirrsize, 0, 0, mwormream, 0, mirrinit, 0, 0, + +[Devcw] 'c', 0, cwread, cwwrite, cwsize, cwsaddr, cwraddr, cwream, cwrecover, cwinit, 0, 0, +[Devro] 'o', 0, roread, rowrite, cwsize, cwsaddr, cwraddr, 0, 0, roinit, 0, 0, +[Devia] 'a', 0, iaread, iawrite, iasize, 0, 0, noream, 0, iainit, 0, 0, +[Devaoe] 'e', 0, aoeread, aoewrite, aoesize, 0, 0, noream, 0, aoeinit, 0, 0, +[Devfworm] 'f', 0, NO //fwormread, fwormwrite, fwormsize, 0, 0, fwormream, 0, fworminit, 0, 0, +[Devide] 'h', 0, NO //ideread, idewrite, idesize, 0, 0, noream, 0, ideinit, idesecsize, 0, +[Devjuke] 'j', 0, NO //jukeread, jukewrite, jukesize, 0, 0, noream, 0, jukeinit, 0, 0, +[Devlworm] 'l', 0, NO //wormread, wormwrite, wormsize, 0,0, noream, 0, jukeinit, 0, 0, +[Devmv] 'm', 0, NO //mvread, mvwrite, mvsize, 0, 0, noream, 0, mvinit, 0, 0, +[Devpart] 'p', 0, partread, partwrite, partsize, 0, 0, noream, 0, partinit, 0, 0, +[Devworm] 'r', 0, NO //wormread, wormwrite, wormsize, 0,0, noream, 0, jukeinit, 0, 0, +[Devwren] 'w', 0, NO //wrenread, wrenwrite, wrensize, 0, 0, noream, 0, wreninit, 0, 0, +[Devswab] 'x', 0, NO //swabread, swabwrite, swabsize, swabsuper, swabraddr, swabream, swabrecover, swabinit, 0, 0, +[Devfloppy] 'y', 0, NO //flread, flwrite, flsize, 0, 0, noream, 0, floppyinit, 0, 0, +}; --- /sys/src/fs/ivey/etherctlr.c Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/etherctlr.c Tue Aug 27 16:11:10 2013 @@ -0,0 +1,38 @@ +#include "all.h" +#include "io.h" + +#include "../ip/ip.h" +#include "etherif.h" + +extern int etherga620reset(Ether*); +extern int ether21140reset(Ether*); +extern int etherelnk3reset(Ether*); +extern int etheri82557reset(Ether*); +extern int igbepnp(Ether *); +extern int dp83815reset(Ether*); +extern int dp83820pnp(Ether*); +extern int rtl8139pnp(Ether*); +extern int rtl8169pnp(Ether*); +extern int i82563reset(Ether*); +extern int i82598pnp(Ether*); +extern int m10gpnp(Ether*); + +Etherctlr etherctlr[] = { +// { "21140", ether21140reset, }, +// { "2114x", ether21140reset, }, +// { "3C509", etherelnk3reset, }, +// { "83815", dp83815reset, }, +// { "dp83820", dp83820pnp, }, +// { "elnk3", etherelnk3reset, }, +// { "ga620", etherga620reset, }, +// { "i82557", etheri82557reset, }, +// { "igbe", igbepnp, }, +// { "i82543", igbepnp, }, +// { "rtl8139", rtl8139pnp, }, + { "rtl8169", rtl8169pnp, }, + { "i82563", i82563reset }, + { "i82568", i82598pnp }, +// { "m10g", m10gpnp }, +}; + +int netherctlr = nelem(etherctlr); --- /sys/src/fs/ivey/todo Thu Jan 1 00:00:00 1970 +++ /sys/src/fs/ivey/todo Tue Aug 27 16:11:10 2013 @@ -0,0 +1,6 @@ +features +- per-mach scheduler + +bugs +- ether82563 works very poorly with the current etheri +- serial console on reboot flakey --- /sys/src/fs/mkfile Tue Aug 27 16:11:11 2013 +++ /sys/src/fs/mkfile Tue Aug 27 16:11:11 2013 @@ -3,9 +3,10 @@ buda\ dahlonega\ fairyland\ - fs\ fs64\ + fs\ ila\ + ivey\ wrens\ all:V: