rework networking - reorganize etherif, and allow for some limited automatic discovery. - protcols now have a signature like so: aoereceive(Ifc *ifc, Msgbuf *mb) which avoids extra copies. - reorganize arp/ip. rework frag algorithm. Reference: /n/atom/patch/applied/fsnetrework Date: Mon Apr 14 07:30:19 CES 2014 Signed-off-by: quanstro@quanstro.net --- /sys/src/fs/ip/arp.c Mon Apr 14 07:22:30 2014 +++ /sys/src/fs/ip/arp.c Mon Apr 14 07:22:31 2014 @@ -18,8 +18,7 @@ static int ipahash(uchar*); static void cmd_arp(int, char*[]); -static -struct +static struct { Lock; uchar null[Pasize]; @@ -68,6 +67,8 @@ if(arpcache.start == 0) { lock(&arpcache); if(arpcache.start == 0) { + arpcache.idgen = toytime() * 80021+1; + cmd_install("arp", "subcommand -- arp protocol", cmd_arp); arpcache.flag = flag_install("arp", "-- verbose"); arpcache.start = 1; @@ -77,27 +78,59 @@ } } -void -arpreceive(Enpkt *ep, int l, Ifc *ifc) +static void +arpresolve(Arpe *a) { - Ilp* ilp; - Arppkt *p, *q; - Msgbuf *mb, **mbp; - Arpe *a; uchar *tpa; - int type, i, h; + Ilp* ilp; + Msgbuf *mb, **mm; Timet t; - if(l < Ensize+Arpsize) + t = toytime(); +restart: + for(mm = &arpcache.unresol; (mb = *mm) != nil; ){ + if(t >= mb->param){ + *mm = mb->next; + mb->next = nil; + mbfree(mb); + continue; + } + ilp = mb->chan->pdata; + tpa = ilp->ipgate; + if(memcmp(a->tpa, tpa, Pasize) == 0) { + *mm = mb->next; + mb->next = nil; + + unlock(&arpcache); + ipsend(mb); + lock(&arpcache); + goto restart; + } + mm = &mb->next; + } +} + +void +arpreceive(Ifc *ifc, Msgbuf *mb) +{ + int type, i, h; + Arppkt *p; + Arpe *a; + + if(mb->count < Ensize+Arpsize){ + mbfree(mb); return; + } - p = (Arppkt*)ep; + p = (Arppkt*)mb->data; if(nhgets(p->pro) != Iptype || nhgets(p->hrd) != 1 || p->pln != Pasize || - p->hln != Easize) + p->hln != Easize){ + mbfree(mb); return; + } type = nhgets(p->op); switch(type) { @@ -114,22 +147,22 @@ } unlock(&arpcache); - if(memcmp(p->tpa, ifc->ipa, Pasize) != 0) - break; + if(memcmp(p->tpa, ifc->ipa, Pasize) != 0){ + mbfree(mb); + return; + } DEBUG("rcv arp req for %I from %I\n", p->tpa, p->spa); - mb = mballoc(Ensize+Arpsize, 0, Mbarp1); - q = (Arppkt*)mb->data; + hnputs(p->op, Arpreply); + memmove(p->d, p->s, Easize); + memmove(p->s, ifc->ea, Easize); - memmove(q, p, Ensize+Arpsize); + memmove(p->tha, p->sha, Easize); + memmove(p->sha, ifc->ea, Easize); - hnputs(q->op, Arpreply); - memmove(q->tha, p->sha, Easize); - memmove(q->tpa, p->spa, Pasize); - memmove(q->sha, ifc->ea, Easize); - memmove(q->spa, ifc->ipa, Pasize); - memmove(q->d, q->s, Easize); + memmove(p->tpa, p->spa, Pasize); + memmove(p->spa, ifc->ipa, Pasize); send(ifc->reply, mb); break; @@ -143,52 +176,31 @@ for(i=0; itpa, p->spa, Pasize) == 0) { memmove(a->tha, p->sha, Easize); - goto out; + break; } } - i = arpcache.abkt[h].laste + 1; - if(i < 0 || i >= Ne) - i = 0; - arpcache.abkt[h].laste = i; - - a = &arpcache.abkt[h].arpe[i]; - memmove(a->tpa, p->spa, Pasize); - memmove(a->tha, p->sha, Easize); + if(i == Ne){ + i = arpcache.abkt[h].laste + 1; + if(i < 0 || i >= Ne) + i = 0; + arpcache.abkt[h].laste = i; + + a = &arpcache.abkt[h].arpe[i]; + memmove(a->tpa, p->spa, Pasize); + memmove(a->tha, p->sha, Easize); + } /* * go thru unresolved queue */ - out: - t = toytime(); - mbp = &arpcache.unresol; - for(mb = *mbp; mb; mb = *mbp) { - if(t >= mb->param) { - *mbp = mb->next; - unlock(&arpcache); - mbfree(mb); - lock(&arpcache); - goto out; - } - ilp = mb->chan->pdata; - tpa = ilp->ipgate; - if(memcmp(a->tpa, tpa, Pasize) == 0) { - *mbp = mb->next; - mb->next = 0; - unlock(&arpcache); - ipsend(mb); - lock(&arpcache); - goto out; - } - mbp = &mb->next; - } + arpresolve(a); unlock(&arpcache); break; } } -static -int +static int ipahash(uchar *p) { ulong h; @@ -215,8 +227,11 @@ a = arpcache.abkt[ipahash(ipgate)].arpe; lock(&arpcache); for(i=0; itpa, ipgate, Pasize) == 0) + if(memcmp(a->tpa, ipgate, Pasize) == 0){ + id = arpcache.idgen++; + unlock(&arpcache); goto found; + } /* * queue ip pkt to be resolved later @@ -274,14 +289,9 @@ found: len = mb->count; /* includes Ensize+Ipsize+Ilsize */ memmove(p->d, a->tha, Easize); - p->vihl = IP_VER|IP_HLEN; + p->vihl = IP_VER4|IP_HLEN4; p->tos = 0; p->ttl = 255; - id = arpcache.idgen; - if(id == 0) - id = toytime() * 80021; - arpcache.idgen = id+1; - unlock(&arpcache); hnputs(p->id, id); hnputs(p->type, Iptype); @@ -354,8 +364,8 @@ Chan *cp; cp = mb->chan; - if(cp == 0) { - print("cp = 0\n"); + if(cp == nil) { + print("cp = nil\n"); mbfree(mb); return; } --- /sys/src/fs/ip/icmp.c Mon Apr 14 07:22:32 2014 +++ /sys/src/fs/ip/icmp.c Mon Apr 14 07:22:32 2014 @@ -25,7 +25,8 @@ switch(p->icmptype) { default: - goto drop; + mbfree(mb); + return; case EchoRequest: memmove(tmp, p->src, Pasize); @@ -42,12 +43,8 @@ if((nhgetl(ifc->ipa)&ifc->mask) != (nhgetl(p->dst)&ifc->mask)) iproute(tmp, p->dst, ifc->netgate); ipsend1(mb, ifc, tmp); - break; + return; } - return; - -drop: - mbfree(mb); } void --- /sys/src/fs/ip/il.c Mon Apr 14 07:22:34 2014 +++ /sys/src/fs/ip/il.c Mon Apr 14 07:22:36 2014 @@ -146,8 +146,8 @@ dstp = nhgets(p->ildst); lock(&il); - xcp = 0; - for(cp = il.chan; cp; cp = ilp->chan) { + xcp = nil; + for(cp = il.chan; cp != nil; cp = ilp->chan) { ilp = cp->pdata; if(ilp->alloc == 0) { xcp = cp; @@ -162,7 +162,7 @@ } } - if(il.reply == 0) { + if(il.reply == nil) { il.reply = newqueue(Nqueue); userinit(ilout, &il, "ilo"); userinit(iltimer, &il, "ilt"); @@ -185,14 +185,13 @@ } cp = xcp; - if(cp == 0) { + if(cp == nil) { cp = chaninit(Devil, 1, sizeof(Ilp)); ilp = cp->pdata; ilp->chan = il.chan; il.chan = cp; } - cp->ifc = ifc; ilp = cp->pdata; memmove(ilp->iphis, p->src, Pasize); @@ -213,7 +212,7 @@ cp->protocol = nil; cp->msize = 0; cp->whotime = 0; - sprint(cp->whochan, "il!%I!%d", p->src, srcp); + snprint(cp->whochan, sizeof cp->whochan, "il!%I!%d", p->src, srcp); cp->whoprint = ilwhoprint; ilp->alloc = 1; --- /sys/src/fs/ip/ip.c Mon Apr 14 07:22:37 2014 +++ /sys/src/fs/ip/ip.c Mon Apr 14 07:22:38 2014 @@ -1,8 +1,7 @@ #include "all.h" - #include "../ip/ip.h" -#define DEBUG if(cons.flags&Fip)print +#define dprint(...) do{if(cons.flags&ipflag)print(__VA_ARGS__);}while(0) typedef struct Rock Rock; typedef struct Frag Frag; @@ -25,213 +24,268 @@ Frag frag[Nfrag]; }; -static -struct -{ +static int ipflag; +static struct { Lock; Rock rock[Nrock]; } ip; -void -ipreceive(Enpkt *ep, int l, Ifc *ifc) +static int +ip4chk(Ifc *ifc, Msgbuf *mb) { Ippkt *p; - Msgbuf *mb; - Rock *r, *or; - Frag *f; - int len, id, frag, off, loff, i, n; -// Ippkt pkt; - Timet t; - p = (Ippkt*)ep; - if(l < Ensize+Ipsize) { + p = (Ippkt*)mb->data; + if(mb->count < Ensize+Ipsize) { ifc->sumerr++; print("ip: en too small\n"); - return; + return -1; } - if(l > LARGEBUF) { + if(mb->count > LARGEBUF) { ifc->sumerr++; print("ip: en too large\n"); - return; + return -1; } -// memmove(&pkt, p, Ensize+Ipsize); /* copy pkt to 'real' memory */ - if(p->vihl != (IP_VER|IP_HLEN)) - return; + if(p->vihl != (IP_VER4|IP_HLEN4)) + return -1; if(!ipforme(p->dst, ifc)) - return; -// if((m->flags & Bipck) == 0) + return -1; + + if((mb->flags & Bipck) == 0) if(ipcsum(&p->vihl)) { ifc->sumerr++; // print("ip: checksum error (from %I)\n", p->src); - return; + return -1; } + return 0; +} - frag = nhgets(p->frag); - len = nhgets(p->length) - Ipsize; - id = nhgets(p->id); +static void +ip4recv(Ifc *ifc, Msgbuf *mb) +{ + Ippkt *p; - /* - * total ip msg fits into one frag - */ - if((frag & ~IP_DF) == 0) { - mb = mballoc(l, 0, Mbip3); -// memmove(mb->data, &pkt, Ensize+Ipsize); -// memmove(mb->data + (Ensize+Ipsize), -// (uchar*)p + (Ensize+Ipsize), l-(Ensize+Ipsize)); - memmove(mb->data, (uchar*)p, l); - goto send; + p = (Ippkt*)mb->data; + switch(p->proto) { + default: + mbfree(mb); + break; + case Ilproto: + ilrecv(mb, ifc); + break; + case Udpproto: + udprecv(mb, ifc); + break; + case Icmpproto: + icmprecv(mb, ifc); + break; + case Igmpproto: + igmprecv(mb, ifc); + break; + case Tcpproto: + tcprecv(mb, ifc); + break; } +} - /* - * throw away old rocks. - */ - t = toytime(); - lock(&ip); - r = ip.rock; - for(i=0; imb && t >= r->age) { - mbfree(r->mb); - r->mb = 0; - } +void +freerock(Rock *r) +{ + if(r->mb){ + mbfree(r->mb); + r->mb = nil; + } +} + +/* + * the best laid plains, etc. + * + * copy out to avoid forcing the drivers to keep LARGEBUF-sized + * buffers in their rings. revisit this decision later. + */ +static Rock* +findrock(Msgbuf **mbp, Timet t) +{ + int i, id; + Ippkt *p; + Msgbuf *mb; + Rock *r, *newr, *oldr; /* * reassembly of fragments * look up rock by src, dst, id. */ - or = 0; - r = ip.rock; - for(i=0; imb == 0) { - if(or == 0) - or = r; + mb = *mbp; + p = (Ippkt*)mb->data; + id = nhgets(p->id); + + newr = nil; + oldr = nil; + for(i=0; imb != nil && t >= r->age) + freerock(r); + /* empty? use if rock not found */ + if(r->mb == nil) { + if(newr == nil) + newr = r; continue; } + /* old? use if rock not found, and no free rocks */ + if(oldr == nil || r->age < oldr->age) + oldr = r; /* oldest rock */ if(id == r->id) if(memcmp(r->src, p->src, Pasize) == 0) if(memcmp(r->dst, p->dst, Pasize) == 0) - goto found; + return r; } - r = or; - if(r == 0) { - /* no available rocks */ - r = ip.rock; - for(i=0; imb) { - mbfree(r->mb); - r->mb = 0; - } - r = ip.rock; + r = newr; + if(r == nil){ + freerock(oldr); + r = oldr; } r->id = id; - r->mb = mballoc(LARGEBUF, 0, Mbip2); +// r->mb = mb; + r->mb = r->mb = mballoc(LARGEBUF, nil, Mbip2); + memmove(r->mb->data, mb->data, Ensize+Ipsize); /* copy in header */ + memmove(r->src, p->src, Pasize); memmove(r->dst, p->dst, Pasize); r->nfrag = 0; r->last = 0; +// *mbp = nil; /* can't reuse */ + return r; +} -found: - mb = r->mb; +static void +fragprint(Frag *f, int idx, int n, char *label) +{ + int i; + + print("%s ", label); + for(i = 0; i < n; i++) + print("[%d %d] ", f[i].start, f[i].end); + print(" (i=%d; n=%d)\n", idx, n); +} + +void +ipreceive(Ifc *ifc, Msgbuf *mb) +{ + int len, frag, off, i, n; + Frag *f; + Ippkt *p; + Rock *r; + Timet t; + + if(ip4chk(ifc, mb) == -1){ + mbfree(mb); + return; + } + + p = (Ippkt*)mb->data; + frag = nhgets(p->frag); + len = nhgets(p->length) - Ipsize; + + /* + * total ip msg fits into one frag + */ + if((frag & ~IP_DF) == 0){ + ip4recv(ifc, mb); + return; + } + + t = toytime(); + lock(&ip); + + r = findrock(&mb, t); r->age = t + SECOND(30); off = (frag & ~(IP_DF|IP_MF)) << 3; - if(len+off+Ensize+Ipsize > mb->count) { + if(len+off+Ensize+Ipsize > r->mb->count) { + dprint("ip pkt too big %d > %d\n", len+off+Ensize+Ipsize, r->mb->count); /* ip pkt too big */ + freerock(r); + unlock(&ip); mbfree(mb); - r->mb = 0; - goto uout; + return; } if(!(frag & IP_MF)) r->last = off+len; /* found the end */ - memmove(mb->data+(Ensize+Ipsize)+off, - (uchar*)p + (Ensize+Ipsize), len); + n = r->nfrag; + if(n+1 == Nfrag){ + /* too many frags. count is sloppy, since we merge later */ + freerock(r); + unlock(&ip); + dprint("ip: too many unmerged frags %d\n", Nfrag); + return; + } /* - * frag algorithm: - * first entry is easy + * frag algorithm: merge sort O(n²) in unmerged fragments */ - n = r->nfrag; - if(n == 0) { - r->frag[0].start = off; - r->frag[0].end = off+len; - r->nfrag = 1; - goto span; + if(mb != nil){ + dprint("frag[%d]: [%d %d]\n", n, off, off+len); + memmove(r->mb->data+(Ensize+Ipsize)+off, + (uchar*)p + (Ensize+Ipsize), len); + mbfree(mb); + mb = nil; } /* - * two in a row is easy + * insertion sort */ - if(r->frag[n-1].end == off) { - r->frag[n-1].end += len; - goto span; - } + f = r->frag; + for(i = n; i > 0 && f[i-1].end > off; i--) + f[i] = f[i-1]; + f[i].start = off; + f[i].end = off+len; + n++; /* - * add this frag + * merge previous and next, if possible */ - if(n >= Nfrag) { - /* too many frags */ - mbfree(mb); - r->mb = 0; - goto uout; + if(i > 0 && f[i-1].end == off){ + f[i-1].end = f[i].end; + n--; + memmove(f+i, f+i+1, sizeof(*f)*(n-i)); + // i--; + } + if(i+1 < n && f[i+1].start == off+len){ + f[i].end = f[i+1].end; + n--; + i++; + memmove(f+i, f+i+1, sizeof(*f)*(n-i)); } - r->frag[n].start = off; - r->frag[n].end = off+len; - n++; r->nfrag = n; -span: + if(cons.flags&ipflag) + fragprint(f, i, n, "postmerge"); + /* - * see if we span the whole list - * can be O(n**2), but usually much smaller + * if the first frag covers the segment, then we have + * the whole packet merged. */ - if(r->last == 0) - goto uout; - off = 0; + if(f[0].start > 0 || f[0].end != r->last){ + unlock(&ip); + return; + } -spanloop: - loff = off; - f = r->frag; - for(i=0; i= f->start && off < f->end) - off = f->end; - if(loff == off) - goto uout; - if(off < r->last) - goto spanloop; + mb = r->mb; + mb->count = r->last + (Ensize+Ipsize); - memmove(mb->data, p, Ensize+Ipsize); - p = (Ippkt*)mb->data; - hnputs(p->length, r->last+Ipsize); - l = r->last + (Ensize+Ipsize); - mb->count = l; - r->mb = 0; + r->mb = nil; unlock(&ip); -send: - switch(p->proto) { - default: - mbfree(mb); - break; - case Ilproto: - ilrecv(mb, ifc); - break; - case Udpproto: - udprecv(mb, ifc); - break; - case Icmpproto: - icmprecv(mb, ifc); - break; - case Igmpproto: - igmprecv(mb, ifc); - break; - case Tcpproto: - tcprecv(mb, ifc); - break; - } - return; + p = (Ippkt*)mb->data; + hnputs(p->length, r->last+Ipsize); + ip4recv(ifc, mb); +} -uout: - unlock(&ip); +void +ipstart(void) +{ + ipflag = flag_install("ip", "-- debugging"); } --- /sys/src/fs/ip/ip.h Mon Apr 14 07:22:39 2014 +++ /sys/src/fs/ip/ip.h Mon Apr 14 07:22:40 2014 @@ -86,7 +86,7 @@ Udpproto = 17, Ilproto = 40, - Nqueue = 20, + Nqueue = 50, Nfrag = 6, /* max number of non-contig ip fragments */ Nrock = 20, /* number of partial ip assembly stations */ Nb = 211, /* number of arp hash buckets */ @@ -99,8 +99,8 @@ Udpsize = 8, /* il header size -- doesnt include Ipsize/Ensize */ Udpphsize = 12, /* udp pseudo ip header size */ - IP_VER = 0x40, /* Using IP version 4 */ - IP_HLEN = Ipsize/4, /* Header length in longs */ + IP_VER4 = 0x40, /* Using IP version 4 */ + IP_HLEN4 = Ipsize/4, /* Header length in longs */ IP_DF = 0x4000, /* Don't fragment */ IP_MF = 0x2000, /* More fragments */ @@ -259,17 +259,18 @@ Ifc* enets; /* List of configured interfaces */ -void riprecv(Msgbuf*, Ifc*); -void sntprecv(Msgbuf *mb, Ifc *ifc); +void arpreceive(Ifc*, Msgbuf*); +void cecreceive(Ifc*, Msgbuf*); +void ipreceive(Ifc*, Msgbuf*); -void arpreceive(Enpkt*, int, Ifc*); -void cecreceive(Enpkt*, int, Ifc*); -void ipreceive(Enpkt*, int, Ifc*); -void ilrecv(Msgbuf*, Ifc*); -void udprecv(Msgbuf*, Ifc*); void icmprecv(Msgbuf*, Ifc*); void igmprecv(Msgbuf*, Ifc*); +void ilrecv(Msgbuf*, Ifc*); +void riprecv(Msgbuf*, Ifc*); +void sntprecv(Msgbuf*, Ifc*); void tcprecv(Msgbuf*, Ifc*); +void udprecv(Msgbuf*, Ifc*); + void iprouteinit(void); long ipclassmask(uchar*); void iproute(uchar*, uchar*, uchar*); @@ -281,6 +282,8 @@ int ptclcsum(uchar*, int); void ipsend(Msgbuf*); void ipsend1(Msgbuf*, Ifc*, uchar*); + +void ipstart(void); uchar sntpip[Pasize]; /* ip address of sntp server */ --- /sys/src/fs/dev/aoe.c Mon Apr 14 07:22:42 2014 +++ /sys/src/fs/dev/aoe.c Mon Apr 14 07:22:44 2014 @@ -1377,22 +1377,22 @@ } void -aoereceive(Enpkt *p, int count, Ifc *ifc) +aoereceive(Ifc *ifc, Msgbuf *mb) { int i; - Msgbuf *mb; Netlink *nl; - if(count < 60) + if(mb->count < 60){ + mbfree(mb); return; - if((i = ifcidx(ifc)) == -1) + } + if((i = ifcidx(ifc)) == -1){ + mbfree(mb); return; + } nl = netlinks.nl+i; if(UP(nl) == 0) return; - /* too stupid for words. */ - mb = mballoc(count, 0, 0); - memmove(mb->data, p, count); send(aoeq[i], mb); } --- /sys/src/fs/amd64/etherif.c Mon Apr 14 07:22:45 2014 +++ /sys/src/fs/amd64/etherif.c Mon Apr 14 07:22:46 2014 @@ -13,13 +13,13 @@ void etheriq(Ether* ether, Msgbuf* mb) { + mb->next = nil; ilock(ðer->rqlock); if(ether->rqhead) ether->rqtail->next = mb; else ether->rqhead = mb; ether->rqtail = mb; - mb->next = 0; iunlock(ðer->rqlock); wakeup(ðer->rqr); @@ -28,7 +28,41 @@ static int isinput(void* arg) { - return ((Ether*)arg)->rqhead != 0; + return ((Ether*)arg)->rqhead != nil; +} + +static void +recvpkt(Ifc *ifc, Msgbuf *mb) +{ + int cnt; + Enpkt *p; + + p = (Enpkt*)mb->data; + cnt = mb->count; + + switch(nhgets(p->type)){ + case Arptype: + arpreceive(ifc, mb); + break; + case Cectype: + cecreceive(ifc, mb); + break; + case Aoetype: + aoereceive(ifc, mb); + break; + case Iptype: + ipreceive(ifc, mb); + break; + default: + cnt = 0; + mbfree(mb); + break; + } + if(cnt > 0){ + ifc->rxpkt++; + ifc->work.count++; + ifc->rate.count += cnt; + } } static void @@ -36,8 +70,7 @@ { Ether *ether; Ifc *ifc; - Msgbuf *mb; - Enpkt *p; + Msgbuf *mb, *next; ether = u->arg; ifc = ðer->ifc; @@ -53,31 +86,14 @@ continue; } mb = ether->rqhead; - ether->rqhead = mb->next; + ether->rqhead = nil; iunlock(ðer->rqlock); - p = (Enpkt*)mb->data; - switch(nhgets(p->type)){ - case Arptype: - arpreceive(p, mb->count, ifc); - break; - case Cectype: - cecreceive(p, mb->count, ifc); - break; - case Aoetype: - aoereceive(p, mb->count, ifc); - break; - case Iptype: - ipreceive(p, mb->count, ifc); - break; - default: - goto done; + for(; mb != nil; mb = next){ + next = mb->next; + mb->next = nil; + recvpkt(ifc, mb); } - ifc->rxpkt++; - ifc->work.count++; - ifc->rate.count += mb->count; - done: - mbfree(mb); } } @@ -184,32 +200,35 @@ continue; ifc = ðerif[i].ifc; - print("ether stats %d %E\n", etherif[i].ctlrno, etherif[i].ea); - print(" work =%9W pkts\n", &ifc->work); - print(" rate =%9W Bps\n", &ifc->rate); - print(" err = %3ld rc %3ld sum\n", ifc->rcverr, ifc->sumerr); + print("ether stats %d %E %I->%I\n", etherif[i].ctlrno, etherif[i].ea, ifc->ipa, ifc->netgate); + print(" work %9W pkts\n", &ifc->work); + print(" rate %9W Bps\n", &ifc->rate); + print(" err tx %3ld rx %3ld sum %3ld\n", ifc->txerr, ifc->rcverr, ifc->sumerr); + print(" pkts tx %3ld rx %3ld\n", ifc->rxpkt, ifc->txpkt); } } void etherstart(void) { + char buf[100], *p; int i; + Ether *e; Ifc *ifc, *tail; - char buf[100], *p; nether = 0; tail = 0; for(i = 0; i < MaxEther; i++){ - if(etherif[i].mbps == 0) + e = etherif + i; + if(e->mbps == 0) continue; - ifc = ðerif[i].ifc; + ifc = &e->ifc; lock(ifc); - getipa(ifc, etherif[i].ctlrno); + getipa(ifc, e->ctlrno); if(!isvalidip(ifc->ipa)){ unlock(ifc); - etherif[i].mbps = 0; + e->mbps = 0; continue; } if(ifc->reply == 0){ @@ -219,10 +238,10 @@ } unlock(ifc); - sprint(etherif[i].oname, "ether%do", etherif[i].ctlrno); - userinit(ethero, etherif+i, etherif[i].oname); - sprint(etherif[i].iname, "ether%di", etherif[i].ctlrno); - userinit(etheri, etherif+i, etherif[i].iname); + snprint(e->oname, sizeof e->oname, "ether%do", e->ctlrno); + userinit(ethero, etherif+i, e->oname); + snprint(e->iname, sizeof e->iname, "ether%di", e->ctlrno); + userinit(etheri, etherif+i, e->iname); ifc->next = nil; if(enets != nil) @@ -235,6 +254,7 @@ if(nether){ cmd_install("state", "-- ether stats", cmd_state); + ipstart(); arpstart(); if(p = getconf("route")){ snprint(buf, sizeof buf, "route %s", p); @@ -278,11 +298,47 @@ return -1; } +static int +etherprobe(Ether *e, int n) +{ + char buf[32]; + int i, ctlrno; + + ctlrno = e - etherif; + dprint("FOUND ether %s\n", etherctlr[n].type); + e->ctlrno = ctlrno; + e->ifc.maxmtu = ETHERMAXTU; + e->tbdf = BUSUNKNOWN; + for(i = 0; i < e->nopt; i++){ + if(strncmp(e->opt[i], "ea=", 3)) + continue; + if(parseether(e->ea, &e->opt[i][3]) == -1) + memset(e->ea, 0, Easize); + } + dprint(" reset ... "); + if(etherctlr[n].reset(e)){ + dprint("fail\n"); + return -1; + } + if(e->mbps == 0) + e->mbps = 10; + dprint("okay\n"); + if(e->irq == 2) + e->irq = 9; + memmove(e->ifc.ea, e->ea, sizeof e->ea); + snprint(buf, sizeof buf, "ether%d", ctlrno); + intrenable(e->irq, e->interrupt, e, e->tbdf, buf); + + print("ether%d: %s: %dMbps port %#p irq %d mtu %d: %E\n", + ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu, e->ea); + return 0; +} + void etherinit(void) { - char buf[32]; - int i, n, ctlrno; + char *type; + int n, ctlrno; Ether *e; for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ @@ -290,35 +346,15 @@ memset(e, 0, sizeof *e); if(!pciconfig("ether", ctlrno, e)) continue; + type = e->type; + if(type[0] == 0) + type = nil; + if(type == nil && e->tbdf == BUSUNKNOWN) + continue; for(n = 0; n < netherctlr; n++){ - if(cistrcmp(etherctlr[n].type, e->type)) + if(type != nil && cistrcmp(etherctlr[n].type, e->type)) continue; - dprint("FOUND ether %s\n", etherctlr[n].type); - e->ctlrno = ctlrno; - e->tbdf = BUSUNKNOWN; - e->ifc.maxmtu = ETHERMAXTU; - for(i = 0; i < e->nopt; i++){ - if(strncmp(e->opt[i], "ea=", 3)) - continue; - if(parseether(e->ea, &e->opt[i][3]) == -1) - memset(e->ea, 0, Easize); - } - dprint(" reset ... "); - if(etherctlr[n].reset(e)){ - dprint("fail\n"); - break; - } - dprint("okay\n"); - if(e->irq == 2) - e->irq = 9; - memmove(e->ifc.ea, e->ea, sizeof e->ea); - snprint(buf, sizeof buf, "ether%d", ctlrno); - intrenable(e->irq, e->interrupt, e, e->tbdf, buf); - - print("ether%d: %s: %dMbps port %#p irq %d mtu %d", - ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu); - print(": %E\n", e->ea); - break; + etherprobe(e, n); } } } --- /sys/src/fs/pc/etherif.c Mon Apr 14 07:22:47 2014 +++ /sys/src/fs/pc/etherif.c Mon Apr 14 07:22:48 2014 @@ -13,13 +13,13 @@ void etheriq(Ether* ether, Msgbuf* mb) { + mb->next = nil; ilock(ðer->rqlock); if(ether->rqhead) ether->rqtail->next = mb; else ether->rqhead = mb; ether->rqtail = mb; - mb->next = 0; iunlock(ðer->rqlock); wakeup(ðer->rqr); @@ -28,122 +28,74 @@ static int isinput(void* arg) { - return ((Ether*)arg)->rqhead != 0; + return ((Ether*)arg)->rqhead != nil; } static void -etheri(void) +recvpkt(Ifc *ifc, Msgbuf *mb) { - Ether *ether; - Ifc *ifc; - Msgbuf *mb; + int cnt; Enpkt *p; - ether = u->arg; - ifc = ðer->ifc; - print("ether%di: %E %I\n", ether->ctlrno, ether->ifc.ea, ether->ifc.ipa); - ether->attach(ether); + p = (Enpkt*)mb->data; + cnt = mb->count; - for(;;) { - sleep(ðer->rqr, isinput, ether); - - ilock(ðer->rqlock); - if(ether->rqhead == 0) { - iunlock(ðer->rqlock); - continue; - } - mb = ether->rqhead; - ether->rqhead = mb->next; - iunlock(ðer->rqlock); - - p = (Enpkt*)mb->data; - switch(nhgets(p->type)){ - case Arptype: - arpreceive(p, mb->count, ifc); - break; - case Cectype: - cecreceive(p, mb->count, ifc); - break; - case Aoetype: - aoereceive(p, mb->count, ifc); - break; - case Iptype: - ipreceive(p, mb->count, ifc); - break; - default: - goto done; - } + switch(nhgets(p->type)){ + case Arptype: + arpreceive(ifc, mb); + break; + case Cectype: + cecreceive(ifc, mb); + break; + case Aoetype: + aoereceive(ifc, mb); + break; + case Iptype: + ipreceive(ifc, mb); + break; + default: + cnt = 0; + mbfree(mb); + break; + } + if(cnt > 0){ ifc->rxpkt++; ifc->work.count++; - ifc->rate.count += mb->count; - done: - mbfree(mb); + ifc->rate.count += cnt; } } -#ifdef no static void -ethero(void) +etheri(void) { Ether *ether; Ifc *ifc; - Msgbuf *mb; - int len; + Msgbuf *mb, *next; ether = u->arg; ifc = ðer->ifc; - print("ether%do: %E %I\n", ether->ctlrno, ifc->ea, ifc->ipa); + print("ether%di: %E %I\n", ether->ctlrno, ether->ifc.ea, ether->ifc.ipa); + ether->attach(ether); for(;;) { - mb = recv(ifc->reply, 1); - if(mb == nil) - continue; + sleep(ðer->rqr, isinput, ether); - len = mb->count; - if(len > ether->ifc.maxmtu){ - print("ether%do: pkt too big - %d\n", ether->ctlrno, len); - mbfree(mb); + ilock(ðer->rqlock); + if(ether->rqhead == 0) { + iunlock(ðer->rqlock); continue; } - if(len < ETHERMINTU) { - memset(mb->data+len, 0, ETHERMINTU-len); - mb->count = len = ETHERMINTU; - } - memmove(((Enpkt*)(mb->data))->s, ifc->ea, sizeof(ifc->ea)); - - ilock(ðer->tqlock); - if(ether->tqhead) - ether->tqtail->next = mb; - else - ether->tqhead = mb; - ether->tqtail = mb; - mb->next = 0; - iunlock(ðer->tqlock); - - ether->transmit(ether); - - ifc->work.count++; - ifc->rate.count += len; - ifc->txpkt++; - } -} - -Msgbuf* -etheroq(Ether* ether) -{ - Msgbuf *mb; + mb = ether->rqhead; + ether->rqhead = nil; + iunlock(ðer->rqlock); - mb = nil; - ilock(ðer->tqlock); - if(ether->tqhead){ - mb = ether->tqhead; - ether->tqhead = mb->next; + for(; mb != nil; mb = next){ + next = mb->next; + mb->next = nil; + recvpkt(ifc, mb); + } } - iunlock(ðer->tqlock); - - return mb; } -#endif /* * look, ma. no extra queue. @@ -154,7 +106,14 @@ Ether *e; e = u->arg; - print("ether%do: %E %I\n", e->ctlrno, e->ifc.ea, e->ifc.ipa); + print("ether%do: %E %I\n", e->ctlrno, e->ifc.ea, e->ifc.ipa); + + /* should not be necessary; scheduling sucks */ + if(e->transmit == nil){ + static Rendez r[MaxEther]; + for(;;) + tsleep(r + e->ctlrno, no, 0, 10000); + } for(;;){ recv(e->ifc.reply, 0); // wait for something to do. @@ -195,6 +154,41 @@ return m; } +Msgbuf* +etheroq1(Ether* e, int ret) +{ + Msgbuf *m; + Enpkt *p; + Ifc *f; + int len; + + f = &e->ifc; +loop: + if(ret){ + if(f->reply->count == 0) + return 0; + } + m = recv(f->reply, 1); + len = m->count; + if(len > f->maxmtu){ + print("ether%do: pkt too big - %d\n", e->ctlrno, len); + mbfree(m); + goto loop; + } + if(len < ETHERMINTU){ + memset(m->data+len, 0, ETHERMINTU-len); + m->count = len = ETHERMINTU; + } + p = (Enpkt*)m->data; + memmove(p->s, f->ea, sizeof f->ea); + + f->work.count++; + f->rate.count += len; + f->txpkt++; + + return m; +} + static void cmd_state(int, char*[]) { @@ -206,10 +200,11 @@ continue; ifc = ðerif[i].ifc; - print("ether stats %d %E\n", etherif[i].ctlrno, etherif[i].ea); - print(" work =%9W pkts\n", &ifc->work); - print(" rate =%9W Bps\n", &ifc->rate); - print(" err = %3ld rc %3ld sum\n", ifc->rcverr, ifc->sumerr); + print("ether stats %d %E %I->%I\n", etherif[i].ctlrno, etherif[i].ea, ifc->ipa, ifc->netgate); + print(" work %9W pkts\n", &ifc->work); + print(" rate %9W Bps\n", &ifc->rate); + print(" err tx %3ld rx %3ld sum %3ld\n", ifc->txerr, ifc->rcverr, ifc->sumerr); + print(" pkts tx %3ld rx %3ld\n", ifc->rxpkt, ifc->txpkt); } } @@ -257,6 +252,7 @@ if(nether){ cmd_install("state", "-- ether stats", cmd_state); + ipstart(); arpstart(); if(p = getconf("route")){ snprint(buf, sizeof buf, "route %s", p); @@ -290,47 +286,71 @@ return 0; } +int +ethercfgmatch(Ether *e, Pcidev *p, uintmem port) +{ + if(e->port == 0 || e->port == port) + if(e->tbdf == BUSUNKNOWN || p == nil || e->tbdf == p->tbdf){ + return 0; + } + return -1; +} + +static int +etherprobe(Ether *e, int n) +{ + char buf[32]; + int i, ctlrno; + + ctlrno = e - etherif; + dprint("FOUND ether %s\n", etherctlr[n].type); + e->ctlrno = ctlrno; + e->mbps = 10; + e->ifc.maxmtu = ETHERMAXTU; + for(i = 0; i < e->nopt; i++){ + if(strncmp(e->opt[i], "ea=", 3)) + continue; + if(parseether(e->ea, &e->opt[i][3]) == -1) + memset(e->ea, 0, Easize); + } + dprint(" reset ... "); + if(etherctlr[n].reset(e)){ + dprint("fail\n"); + return -1; + } + dprint("okay\n"); + if(e->irq == 2) + e->irq = 9; + memmove(e->ifc.ea, e->ea, sizeof e->ea); + snprint(buf, sizeof buf, "ether%d", ctlrno); + intrenable(e->irq, e->interrupt, e, e->tbdf, buf); + + print("ether%d: %s: %dMbps port %#p irq %d mtu %d: %E\n", + ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu, e->ea); + return 0; +} + void etherinit(void) { + char *type; + int n, ctlrno; Ether *e; - int i, n, ctlrno; for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ e = etherif+ctlrno; memset(e, 0, sizeof *e); - if(!isaconfig("ether", ctlrno, e)){ - dprint("%d: !isaconfig\n", ctlrno); + if(!isaconfig("ether", ctlrno, e)) + continue; + type = e->type; + if(type[0] == 0) + type = nil; + if(type == nil && e->tbdf == BUSUNKNOWN) continue; - } for(n = 0; n < netherctlr; n++){ - if(cistrcmp(etherctlr[n].type, e->type)) + if(type != nil && cistrcmp(etherctlr[n].type, e->type)) continue; - dprint("%d: FOUND ether %s\n", ctlrno, etherctlr[n].type); - e->ctlrno = ctlrno; - e->tbdf = BUSUNKNOWN; - e->ifc.maxmtu = ETHERMAXTU; - for(i = 0; i < e->nopt; i++){ - if(strncmp(e->opt[i], "ea=", 3)) - continue; - if(parseether(e->ea, &e->opt[i][3]) == -1) - memset(e->ea, 0, Easize); - } - dprint(" reset ... "); - if(etherctlr[n].reset(e)){ - dprint("fail\n"); - break; - } - dprint("okay\n"); - if(e->irq == 2) - e->irq = 9; - setvec(Int0vec + e->irq, e->interrupt, e); - memmove(e->ifc.ea, e->ea, sizeof e->ea); - - print("ether%d: %s: %dMbps port %#p irq %ld mtu %d", - ctlrno, e->type, e->mbps, e->port, e->irq, e->ifc.maxmtu); - print(": %E\n", e->ea); - break; + etherprobe(e, n); } } } --- /sys/src/fs/pc/etherif.h Mon Apr 14 07:22:49 2014 +++ /sys/src/fs/pc/etherif.h Mon Apr 14 07:22:50 2014 @@ -7,7 +7,6 @@ int ctlrno; char iname[NAMELEN]; char oname[NAMELEN]; - int tbdf; /* type+busno+devno+funcno */ int mbps; /* Mbps */ uchar ea[Easize]; @@ -34,6 +33,10 @@ int (*reset)(Ether*); }; +enum { + MaxEther = 8, +}; + extern Etherctlr etherctlr[]; extern int netherctlr; extern Ether etherif[MaxEther]; @@ -41,6 +44,7 @@ void etheriq(Ether*, Msgbuf*); Msgbuf *etheroq(Ether*); +int ethercfgmatch(Ether*, Pcidev*, uintmem); int etherga620reset(Ether*); int ether21140reset(Ether*);