clarify a few magic values, minor performance tuning and add support for jumbo frames. suprisingly, jumbo performance is okay heavy standard frame traffic bogs the chip down. 4k aoe frames are 10x faster than standard aoe frames. this is likely a problem with the driver. Notes: Wed Mar 19 15:07:08 EDT 2008 geoff Please (re)read style(6) and edit your code to conform, then resubmit this patch. Thanks. Reference: /n/sources/patch/sorry/ether8169jumbo Date: Fri Feb 29 14:06:08 CET 2008 Signed-off-by: quanstro@quanstro.net Reviewed-by: geoff --- /sys/src/9/pc/ether8169.c Fri Feb 29 14:02:46 2008 +++ /sys/src/9/pc/ether8169.c Fri Feb 29 14:02:43 2008 @@ -43,7 +43,7 @@ Config3 = 0x54, /* Configuration Register 3 */ Config4 = 0x55, /* Configuration Register 4 */ Config5 = 0x56, /* Configuration Register 5 */ - Timerint = 0x58, /* Timer Interrupt */ + Timerint = 0x58, /* Timer Interrupt */ Mulint = 0x5C, /* Multiple Interrupt Select */ Phyar = 0x60, /* PHY Access */ Tbicsr0 = 0x64, /* TBI Control and Status */ @@ -53,8 +53,9 @@ Rms = 0xDA, /* Receive Packet Maximum Size */ Cplusc = 0xE0, /* C+ Command */ + Coal = 0xE2, /* Interrupt Mitigation (Coalesce) */ Rdsar = 0xE4, /* Receive Descriptor Start Address */ - Mtps = 0xEC, /* Max. Transmit Packet Size */ + Etx = 0xEC, /* Early Transmit Threshold */ }; enum { /* Dtccr */ @@ -180,12 +181,11 @@ Tcps = 0x00010000, /* TCP Checksum Offload */ Udpcs = 0x00020000, /* UDP Checksum Offload */ Ipcs = 0x00040000, /* IP Checksum Offload */ - Lgsen = 0x08000000, /* Large Send */ + Lgsen = 0x08000000, /* TSO; WARNING: contains lark's vomit */ }; enum { /* Receive Descriptor control */ - RxflMASK = 0x00003FFF, /* Receive Frame Length */ - RxflSHIFT = 0, + RxflMASK = 0x00001FFF, /* Receive Frame Length */ Tcpf = 0x00004000, /* TCP Checksum Failure */ Udpf = 0x00008000, /* UDP Checksum Failure */ Ipf = 0x00010000, /* IP Checksum Failure */ @@ -212,10 +212,13 @@ /* */ enum { /* Ring sizes (<= 1024) */ - Ntd = 32, /* Transmit Ring */ - Nrd = 128, /* Receive Ring */ + Ntd = 64, /* Transmit Ring */ + Nrd = 256, /* Receive Ring */ - Mps = ROUNDUP(ETHERMAXTU+4, 128), + Stdbuf = 1536, + Mtu = 7000, /* performance limited */ + Mps = Mtu+8+14, /* if(mtu>ETHERMAXTU) */ +// Mps = ROUNDUP(ETHERMAXTU+4, 128), }; typedef struct Dtcc Dtcc; @@ -273,7 +276,7 @@ int ntdfree; int ntq; - int mtps; /* Max. Transmit Packet Size */ +// int rbsz; /* receive buffer size */ Lock rlock; /* receive */ D* rd; /* descriptor ring */ @@ -301,6 +304,7 @@ uint punlc; uint fovw; uint mcast; + uint frag; /* partial packets; rb was too small */ } Ctlr; static Ctlr* rtl8169ctlrhead; @@ -484,10 +488,10 @@ static long rtl8169ifstat(Ether* edev, void* a, long n, ulong offset) { - char *p; + char *p0, *p, *e; Ctlr *ctlr; Dtcc *dtcc; - int i, l, r, timeo; + int i, r, timeo; ctlr = edev->ctlr; qlock(&ctlr->slock); @@ -522,54 +526,56 @@ return 0; } - if((p = malloc(READSTR)) == nil) + if((p = p0 = malloc(READSTR)) == nil) error(Enomem); + e = p+READSTR; - l = snprint(p, READSTR, "TxOk: %llud\n", dtcc->txok); - l += snprint(p+l, READSTR-l, "RxOk: %llud\n", dtcc->rxok); - l += snprint(p+l, READSTR-l, "TxEr: %llud\n", dtcc->txer); - l += snprint(p+l, READSTR-l, "RxEr: %ud\n", dtcc->rxer); - l += snprint(p+l, READSTR-l, "MissPkt: %ud\n", dtcc->misspkt); - l += snprint(p+l, READSTR-l, "FAE: %ud\n", dtcc->fae); - l += snprint(p+l, READSTR-l, "Tx1Col: %ud\n", dtcc->tx1col); - l += snprint(p+l, READSTR-l, "TxMCol: %ud\n", dtcc->txmcol); - l += snprint(p+l, READSTR-l, "RxOkPh: %llud\n", dtcc->rxokph); - l += snprint(p+l, READSTR-l, "RxOkBrd: %llud\n", dtcc->rxokbrd); - l += snprint(p+l, READSTR-l, "RxOkMu: %ud\n", dtcc->rxokmu); - l += snprint(p+l, READSTR-l, "TxAbt: %ud\n", dtcc->txabt); - l += snprint(p+l, READSTR-l, "TxUndrn: %ud\n", dtcc->txundrn); - - l += snprint(p+l, READSTR-l, "txdu: %ud\n", ctlr->txdu); - l += snprint(p+l, READSTR-l, "tcpf: %ud\n", ctlr->tcpf); - l += snprint(p+l, READSTR-l, "udpf: %ud\n", ctlr->udpf); - l += snprint(p+l, READSTR-l, "ipf: %ud\n", ctlr->ipf); - l += snprint(p+l, READSTR-l, "fovf: %ud\n", ctlr->fovf); - l += snprint(p+l, READSTR-l, "ierrs: %ud\n", ctlr->ierrs); - l += snprint(p+l, READSTR-l, "rer: %ud\n", ctlr->rer); - l += snprint(p+l, READSTR-l, "rdu: %ud\n", ctlr->rdu); - l += snprint(p+l, READSTR-l, "punlc: %ud\n", ctlr->punlc); - l += snprint(p+l, READSTR-l, "fovw: %ud\n", ctlr->fovw); - - l += snprint(p+l, READSTR-l, "tcr: %#8.8ux\n", ctlr->tcr); - l += snprint(p+l, READSTR-l, "rcr: %#8.8ux\n", ctlr->rcr); - l += snprint(p+l, READSTR-l, "multicast: %ud\n", ctlr->mcast); + p = seprint(p, e, "TxOk: %llud\n", dtcc->txok); + p = seprint(p, e, "RxOk: %llud\n", dtcc->rxok); + p = seprint(p, e, "TxEr: %llud\n", dtcc->txer); + p = seprint(p, e, "RxEr: %ud\n", dtcc->rxer); + p = seprint(p, e, "MissPkt: %ud\n", dtcc->misspkt); + p = seprint(p, e, "FAE: %ud\n", dtcc->fae); + p = seprint(p, e, "Tx1Col: %ud\n", dtcc->tx1col); + p = seprint(p, e, "TxMCol: %ud\n", dtcc->txmcol); + p = seprint(p, e, "RxOkPh: %llud\n", dtcc->rxokph); + p = seprint(p, e, "RxOkBrd: %llud\n", dtcc->rxokbrd); + p = seprint(p, e, "RxOkMu: %ud\n", dtcc->rxokmu); + p = seprint(p, e, "TxAbt: %ud\n", dtcc->txabt); + p = seprint(p, e, "TxUndrn: %ud\n", dtcc->txundrn); + + p = seprint(p, e, "txdu: %ud\n", ctlr->txdu); + p = seprint(p, e, "tcpf: %ud\n", ctlr->tcpf); + p = seprint(p, e, "udpf: %ud\n", ctlr->udpf); + p = seprint(p, e, "ipf: %ud\n", ctlr->ipf); + p = seprint(p, e, "fovf: %ud\n", ctlr->fovf); + p = seprint(p, e, "ierrs: %ud\n", ctlr->ierrs); + p = seprint(p, e, "rer: %ud\n", ctlr->rer); + p = seprint(p, e, "rdu: %ud\n", ctlr->rdu); + p = seprint(p, e, "punlc: %ud\n", ctlr->punlc); + p = seprint(p, e, "fovw: %ud\n", ctlr->fovw); + p = seprint(p, e, "frag: %ud\n", ctlr->frag); + + p = seprint(p, e, "tcr: %#8.8ux\n", ctlr->tcr); + p = seprint(p, e, "rcr: %#8.8ux\n", ctlr->rcr); + p = seprint(p, e, "multicast: %ud\n", ctlr->mcast); if(ctlr->mii != nil && ctlr->mii->curphy != nil){ - l += snprint(p+l, READSTR, "phy: "); + p = seprint(p, e, "phy: "); for(i = 0; i < NMiiPhyr; i++){ if(i && ((i & 0x07) == 0)) - l += snprint(p+l, READSTR-l, "\n "); + p = seprint(p, e, "\n "); r = miimir(ctlr->mii, i); - l += snprint(p+l, READSTR-l, " %4.4ux", r); + p = seprint(p, e, " %4.4ux", r); } - snprint(p+l, READSTR-l, "\n"); + seprint(p, e, "\n"); } - n = readstr(offset, a, n, p); + n = readstr(offset, a, n, p0); qunlock(&ctlr->slock); poperror(); - free(p); + free(p0); return n; } @@ -628,8 +634,9 @@ ctlr->rb[rdt] = bp; d->addrlo = PCIWADDR(bp->rp); d->addrhi = 0; - } - coherence(); + coherence(); + }else + iprint("i8169: rx overrun\n"); d->control |= Own|Mps; rdt = NEXT(rdt, ctlr->nrd); ctlr->nrdfree++; @@ -652,14 +659,10 @@ rtl8169halt(ctlr); /* - * MAC Address. + * MAC Address is not settable on some (all?) chips. * Must put chip into config register write enable mode. */ csr8w(ctlr, Cr9346, Eem1|Eem0); - r = (edev->ea[3]<<24)|(edev->ea[2]<<16)|(edev->ea[1]<<8)|edev->ea[0]; - csr32w(ctlr, Idr0, r); - r = (edev->ea[5]<<8)|edev->ea[4]; - csr32w(ctlr, Idr0+4, r); /* * Transmitter. @@ -676,12 +679,11 @@ ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0; ctlr->rd[ctlr->nrd-1].control = Eor; - for(i = 0; i < ctlr->nrd; i++){ + for(i = 0; i < ctlr->nrd; i++) if((bp = ctlr->rb[i]) != nil){ ctlr->rb[i] = nil; freeb(bp); } - } rtl8169replenish(ctlr); ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm; @@ -692,18 +694,16 @@ * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst * settings in Tcr/Rcr; the (1<<14) is magic. */ - ctlr->mtps = HOWMANY(Mps, 128); cplusc = csr16r(ctlr, Cplusc) & ~(1<<14); - cplusc |= /*Rxchksum|*/Mulrw; + cplusc |= Rxchksum|Mulrw; switch(ctlr->macv){ default: return -1; case Macv01: - ctlr->mtps = HOWMANY(Mps, 32); break; case Macv02: case Macv03: - cplusc |= (1<<14); /* magic */ + cplusc |= 1<<14; /* magic */ break; case Macv05: /* @@ -765,43 +765,43 @@ /* * Clear missed-packet counter; - * initial early transmit threshold value; + * clear early transmit threshold value; * set the descriptor ring base addresses; * set the maximum receive packet size; * no early-receive interrupts. + * + * note: the maximum rx size is a filter. the size of the buffer + * in the descriptor ring is still honored. we will toss >Mtu + * packets because they've been fragmented into mutiple + * rx buffers. */ csr32w(ctlr, Mpc, 0); - csr8w(ctlr, Mtps, ctlr->mtps); + csr8w(ctlr, Etx, 0x3f); csr32w(ctlr, Tnpds+4, 0); csr32w(ctlr, Tnpds, PCIWADDR(ctlr->td)); csr32w(ctlr, Rdsar+4, 0); csr32w(ctlr, Rdsar, PCIWADDR(ctlr->rd)); - csr16w(ctlr, Rms, Mps); - r = csr16r(ctlr, Mulint) & 0xF000; + csr16w(ctlr, Rms, 16383); /* was Mps */ + r = csr16r(ctlr, Mulint) & 0xF000; /* no early rx interrupts */ csr16w(ctlr, Mulint, r); csr16w(ctlr, Cplusc, cplusc); + csr16w(ctlr, Coal, 0); /* * Set configuration. */ switch(ctlr->pciv){ - default: - break; case Rtl8169sc: - csr16w(ctlr, 0xE2, 0); /* magic */ csr8w(ctlr, Cr, Te|Re); csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); csr32w(ctlr, Rcr, ctlr->rcr); break; case Rtl8168b: case Rtl8169c: - csr16w(ctlr, 0xE2, 0); /* magic */ csr16w(ctlr, Cplusc, 0x2000); /* magic */ csr8w(ctlr, Cr, Te|Re); csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); csr32w(ctlr, Rcr, ctlr->rcr); - csr16w(ctlr, Rms, 0x0800); - csr8w(ctlr, Mtps, 0x3F); break; } ctlr->tcr = csr32r(ctlr, Tcr); @@ -925,7 +925,7 @@ d->addrhi = 0; ctlr->tb[x] = bp; coherence(); - d->control |= Own|Fs|Ls|((BLEN(bp)<control |= Own|Fs|Ls|BLEN(bp); x = NEXT(x, ctlr->ntd); ctlr->ntq++; @@ -961,9 +961,8 @@ control = d->control; if((control & (Fs|Ls|Res)) == (Fs|Ls)){ bp = ctlr->rb[rdh]; - ctlr->rb[rdh] = nil; - bp->wp = bp->rp + ((control & RxflMASK)>>RxflSHIFT)-4; - bp->next = nil; + bp->wp = bp->rp + (control & RxflMASK)-4; +// bp->next = nil; if(control & Fovf) ctlr->fovf++; @@ -996,13 +995,13 @@ break; } etheriq(edev, bp, 1); + }else{ + if(!(control&Res)) + ctlr->frag++; + /* iprint("i8169: control %#.8ux\n", control); */ + freeb(ctlr->rb[rdh]); } - else{ - /* - * Error stuff here. - print("control %#8.8ux\n", control); - */ - } + ctlr->rb[rdh] = nil; d->control &= Eor; ctlr->nrdfree--; rdh = NEXT(rdh, ctlr->nrd); @@ -1167,6 +1166,7 @@ edev->irq = ctlr->pcidev->intl; edev->tbdf = ctlr->pcidev->tbdf; edev->mbps = 100; + edev->maxmtu = Mtu; /* * Check if the adapter's station address is to be overridden.