Motivation: While developing a discovery protocol, I required the ability to have multiple clients/servers running on the same machine. Other operating systems package this as SO_REUSE*. This patch allows multiple clients to receive UDP packets destined for the same address/port. When announcing, clients can specify the reuse flag 'announce 5050 reuse'. If all prior clients specified the reuse flag, then the announce will succeed and the client will receive a copy of all packets. Once an address/port is announced with reuse, all subsequent clients must also specify reuse. Notes: Tue Jul 13 12:44:42 EDT 2010 geoff The original intent of SO_REUSE was to allow the reuse of a TCP port before the FIN_WAIT timeouts had happened, so that daemon could be restarted without having to wait several minutes. This use of it with UDP sounds dubious and ill-advised. Why not just use different port numbers if you need multiple server instances? Reference: /n/sources/patch/sorry/udp-reuse Date: Tue Jul 13 17:55:44 CES 2010 Signed-off-by: jrw972@gmail.com Reviewed-by: geoff --- /sys/src/9/port/portfns.h Tue Jul 13 17:40:24 2010 +++ /sys/src/9/port/portfns.h Tue Jul 13 17:40:22 2010 @@ -45,6 +45,7 @@ void copen(Chan*); Block* concatblock(Block*); Block* copyblock(Block*, int); +Block* copyblist(Block*); void copypage(Page*, Page*); void countpagerefs(ulong*, int); int cread(Chan*, uchar*, int, vlong); --- /sys/src/9/port/qio.c Tue Jul 13 17:40:26 2010 +++ /sys/src/9/port/qio.c Tue Jul 13 17:40:25 2010 @@ -84,6 +84,31 @@ } /* + * copy a list of blocks + */ +Block* +copyblist(Block *bp) +{ + int len; + Block *nb; + Block **ptr; + + if(bp == nil) + return nil; + + ptr = &nb; + for(; bp; bp = bp->next) { + len = BLEN(bp); + *ptr = allocb(len); + memmove((*ptr)->wp, bp->rp, len); + (*ptr)->wp += len; + ptr = &(*ptr)->next; + } + + return nb; +} + +/* * pad a block to the front (or the back if size is negative) */ Block* --- /sys/src/9/ip/ip.h Tue Jul 13 17:40:28 2010 +++ /sys/src/9/ip/ip.h Tue Jul 13 17:40:27 2010 @@ -197,6 +197,7 @@ /* udp specific */ int headers; /* data src/dst headers in udp */ int reliable; /* true if reliable udp */ + int reuse; /* true if others can reuse address */ Conv* incall; /* calls waiting to be listened for */ Conv* next; @@ -376,6 +377,7 @@ }; void iphtadd(Ipht*, Conv*); void iphtrem(Ipht*, Conv*); +Conv* iphtlookn(Ipht *ht, uint cidx, uchar *sa, ushort sp, uchar *da, ushort dp); Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); /* --- /sys/src/9/ip/devip.c Tue Jul 13 17:40:31 2010 +++ /sys/src/9/ip/devip.c Tue Jul 13 17:40:29 2010 @@ -756,7 +756,8 @@ && xp->lport == lport && xp->rport == c->rport && ipcmp(xp->raddr, c->raddr) == 0 - && ipcmp(xp->laddr, c->laddr) == 0){ + && ipcmp(xp->laddr, c->laddr) == 0 + && (!xp->reuse || !c->reuse) ){ qunlock(p); return "address in use"; } --- /sys/src/9/ip/ipaux.c Tue Jul 13 17:40:33 2010 +++ /sys/src/9/ip/ipaux.c Tue Jul 13 17:40:32 2010 @@ -298,7 +298,7 @@ * announced && *,* */ Conv* -iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +iphtlookn(Ipht *ht, uint cidx, uchar *sa, ushort sp, uchar *da, ushort dp) { ulong hv; Iphash *h; @@ -313,8 +313,12 @@ c = h->c; if(sp == c->rport && dp == c->lport && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; + if(cidx == 0) { + unlock(ht); + return c; + } + else + --cidx; } } @@ -325,8 +329,12 @@ continue; c = h->c; if(dp == c->lport && ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; + if(cidx == 0) { + unlock(ht); + return c; + } + else + --cidx; } } @@ -337,8 +345,12 @@ continue; c = h->c; if(dp == c->lport){ - unlock(ht); - return c; + if(cidx == 0) { + unlock(ht); + return c; + } + else + --cidx; } } @@ -349,8 +361,12 @@ continue; c = h->c; if(ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; + if(cidx == 0) { + unlock(ht); + return c; + } + else + --cidx; } } @@ -360,9 +376,19 @@ if(h->match != IPmatchany) continue; c = h->c; - unlock(ht); - return c; + if(cidx == 0) { + unlock(ht); + return c; + } + else + --cidx; } unlock(ht); return nil; } + +Conv* +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +{ + return iphtlookn(ht, 0, sa, sp, da, dp); +} --- /sys/src/9/ip/udp.c Tue Jul 13 17:40:34 2010 +++ /sys/src/9/ip/udp.c Tue Jul 13 17:40:33 2010 @@ -138,6 +138,11 @@ Udppriv *upriv; upriv = c->p->priv; + /* Set reuse. */ + if(argc == 3 && 0 == strcmp(argv[2], "reuse")) { + argc = 2; + c->reuse = 1; + } e = Fsstdannounce(c, argv, argc); if(e != nil) return e; @@ -322,9 +327,9 @@ } void -udpiput(Proto *udp, Ipifc *ifc, Block *bp) +udpiput(Proto *udp, Ipifc *ifc, Block *bptr) { - int len; + int len, tlen; Udp4hdr *uh4; Udp6hdr *uh6; Conv *c; @@ -336,12 +341,14 @@ int version; int ottl, oviclfl, olen; uchar *p; + uint cidx; + Block* bp; upriv = udp->priv; f = udp->f; upriv->ustats.udpInDatagrams++; - uh4 = (Udp4hdr*)(bp->rp); + uh4 = (Udp4hdr*)(bptr->rp); version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4; /* Put back pseudo header for checksum @@ -360,11 +367,11 @@ rport = nhgets(uh4->udpsport); if(nhgets(uh4->udpcksum)) { - if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) { + if(ptclcsum(bptr, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) { upriv->ustats.udpInErrors++; netlog(f, Logudp, "udp: checksum error %I\n", raddr); DPRINT("udp: checksum error %I\n", raddr); - freeblist(bp); + freeblist(bptr); return; } } @@ -372,7 +379,7 @@ hnputs(uh4->udpplen, olen); break; case V6: - uh6 = (Udp6hdr*)(bp->rp); + uh6 = (Udp6hdr*)(bptr->rp); len = nhgets(uh6->udplen); oviclfl = nhgetl(uh6->viclfl); olen = nhgets(uh6->len); @@ -384,11 +391,11 @@ memset(uh6, 0, 8); hnputl(uh6->viclfl, len); uh6->hoplimit = IP_UDPPROTO; - if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) { + if(ptclcsum(bptr, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) { upriv->ustats.udpInErrors++; netlog(f, Logudp, "udp: checksum error %I\n", raddr); DPRINT("udp: checksum error %I\n", raddr); - freeblist(bp); + freeblist(bptr); return; } hnputl(uh6->viclfl, oviclfl); @@ -401,114 +408,120 @@ return; /* to avoid a warning */ } - qlock(udp); - - c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); - if(c == nil){ - /* no conversation found */ - upriv->ustats.udpNoPorts++; - qunlock(udp); - netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, - laddr, lport); + for(cidx = 0; ; ++cidx) { - switch(version){ - case V4: - icmpnoconv(f, bp); - break; - case V6: - icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0); - break; - default: - panic("udpiput2: version %d", version); - } - - freeblist(bp); - return; - } - ucb = (Udpcb*)c->ptcl; + bp = copyblist(bptr); - if(c->state == Announced){ - if(ucb->headers == 0){ - /* create a new conversation */ - if(ipforme(f, laddr) != Runi) { + qlock(udp); + c = iphtlookn(&upriv->ht, cidx, raddr, rport, laddr, lport); + if(c == nil){ + /* no conversation found */ + if(cidx == 0) + upriv->ustats.udpNoPorts++; + qunlock(udp); + if(cidx == 0){ + netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, + laddr, lport); + switch(version){ case V4: - v4tov6(laddr, ifc->lifc->local); + icmpnoconv(f, bp); break; case V6: - ipmove(laddr, ifc->lifc->local); + icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0); break; default: - panic("udpiput3: version %d", version); + panic("udpiput2: version %d", version); } } - c = Fsnewcall(c, raddr, rport, laddr, lport, version); - if(c == nil){ - qunlock(udp); - freeblist(bp); - return; - } - iphtadd(&upriv->ht, c); - ucb = (Udpcb*)c->ptcl; + freeblist(bp); + break; /*return;*/ } - } + ucb = (Udpcb*)c->ptcl; - qlock(c); - qunlock(udp); - - /* - * Trim the packet down to data size - */ - len -= UDP_UDPHDR_SZ; - switch(version){ - case V4: - bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len); - break; - case V6: - bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len); - break; - default: - bp = nil; - panic("udpiput4: version %d", version); - } - if(bp == nil){ - qunlock(c); - netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport, - laddr, lport); - upriv->lenerr++; - return; - } - - netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport, - laddr, lport, len); - - switch(ucb->headers){ - case 7: - /* pass the src address */ - bp = padblock(bp, UDP_USEAD7); - p = bp->rp; - ipmove(p, raddr); p += IPaddrlen; - ipmove(p, laddr); p += IPaddrlen; - ipmove(p, ifc->lifc->local); p += IPaddrlen; - hnputs(p, rport); p += 2; - hnputs(p, lport); - break; - } - - if(bp->next) - bp = concatblock(bp); - - if(qfull(c->rq)){ + if(c->state == Announced){ + if(ucb->headers == 0){ + /* create a new conversation */ + if(ipforme(f, laddr) != Runi) { + switch(version){ + case V4: + v4tov6(laddr, ifc->lifc->local); + break; + case V6: + ipmove(laddr, ifc->lifc->local); + break; + default: + panic("udpiput3: version %d", version); + } + } + c = Fsnewcall(c, raddr, rport, laddr, lport, version); + if(c == nil){ + qunlock(udp); + freeblist(bp); + continue; /*return;*/ + } + iphtadd(&upriv->ht, c); + ucb = (Udpcb*)c->ptcl; + } + } + + qlock(c); + qunlock(udp); + + /* + * Trim the packet down to data size + */ + tlen = len - UDP_UDPHDR_SZ; + switch(version){ + case V4: + bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, tlen); + break; + case V6: + bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, tlen); + break; + default: + bp = nil; + panic("udpiput4: version %d", version); + } + if(bp == nil){ + qunlock(c); + netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + upriv->lenerr++; + break; /*return;*/ + } + + netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport, + laddr, lport, tlen); + + switch(ucb->headers){ + case 7: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD7); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, laddr); p += IPaddrlen; + ipmove(p, ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + } + + if(bp->next) + bp = concatblock(bp); + + if(qfull(c->rq)){ + qunlock(c); + netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + freeblist(bp); + continue; /*return;*/ + } + + qpass(c->rq, bp); qunlock(c); - netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport, - laddr, lport); - freeblist(bp); - return; } - - qpass(c->rq, bp); - qunlock(c); - + freeblist(bptr); } char* --- /sys/man/3/ip Tue Jul 13 17:40:37 2010 +++ /sys/man/3/ip Tue Jul 13 17:40:35 2010 @@ -600,7 +600,7 @@ The connect fails if the combination of local and remote address/port pairs are already assigned to another port. .TP -.BI announce\ X +.BI announce\ "X [ reuse ]" .I X is a decimal port number or .LR * . @@ -617,7 +617,7 @@ calls for any port that no process has explicitly announced. The local IP address cannot be set. .B Announce -fails if the connection is already announced or connected. +fails if the connection is already announced or connected except in the case of reuse (see UDP below). .TP .BI bind\ X .I X @@ -768,6 +768,30 @@ each datagram coming from a different remote address/port pair establishes a new incoming connection. However, many-to-one semantics is also possible. + +.PP +The +.BR reuse +flag of +.BR announce +allows multiple clients to receive datagrams destined for the same address/port pair. +In order to +.BR announce +with the +.BR reuse +flag, all previous clients using the same address/port must specify the +.BR reuse +flag. +Once an address/port has been +.BR announced +with +.BR reuse , +subsequent +.BR announce +attempts will fail without the +.BR reuse +flag. + .PP If, after an .BR announce ,