1. add mss to the tcp status file, 2. try a little harder to give a correct mss in tcp/stats. this will still be wrong for stacks with >1 ifc with differing mss, and of course there is no mib defined for the tcp6 mss. 3. set the mss and cwind to the sender's requested mss in procsyn(), and send our desired mss in tcpsndsyn(). do not process mss option in tcpincoming, as changing the mss is not allowed here. 4. add a little hint where icmp must frag messages should be processed. there are a number of reasons we can't do this without some additional rework, including ipoput4 eats the IP_DF flag. the next step here is to add some logging in the tcp stack to see if we're mis-setting frag[0] anywhere. Reference: /n/atom/patch/tcpmss Date: Sun May 10 20:42:45 CES 2015 Signed-off-by: quanstro@quanstro.net --- /sys/src/nix/ip/tcp.c Sun May 10 20:36:11 2015 +++ /sys/src/nix/ip/tcp.c Sun May 10 20:36:14 2015 @@ -491,11 +491,12 @@ s = (Tcpctl*)(c->ptcl); return snprint(state, n, - "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + "%s qin %d qout %d rq %d.%d mss %d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", tcpstates[s->state], c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0, s->nreseq, s->reseqlen, + s->mss, s->srtt, s->mdev, s->ssthresh, s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, s->qscale, @@ -843,23 +844,27 @@ /* mtu (- TCP + IP hdr len) of 1st hop */ static int -tcpmtu(Proto *tcp, uchar *addr, int version, uint *scale) +tcpmtu(Proto *tcp, uchar *addr, int version, uint reqmss, uint *scale) { + Tcppriv *tpriv; Ipifc *ifc; int mtu; ifc = findipifc(tcp->f, addr, 0); + tpriv = tcp->priv; switch(version){ default: case V4: mtu = DEF_MSS; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE); + tpriv->stats[Mss] = mtu; /* nope -- we can have >1 ifc per ip stack */ break; case V6: mtu = DEF_MSS6; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE); + tpriv->stats[Mss] = mtu + (TCP6_PKT + TCP6_HDRSIZE) - (TCP4_PKT + TCP4_HDRSIZE); break; } /* @@ -868,6 +873,10 @@ */ *scale = Defadvscale; + /* our sending max segment size cannot be bigger than what he asked for */ + if(reqmss != 0 && reqmss < mtu) + mtu = reqmss; + return mtu; } @@ -878,7 +887,6 @@ Tcp4hdr* h4; Tcp6hdr* h6; Tcppriv *tpriv; - int mss; tcb = (Tcpctl*)s->ptcl; @@ -900,8 +908,6 @@ tcb->katimer.func = tcpkeepalive; tcb->katimer.arg = s; - mss = DEF_MSS; - /* create a prototype(pseudo) header */ if(mode != TCP_LISTEN){ if(ipcmp(s->laddr, IPnoaddr) == 0) @@ -925,14 +931,13 @@ hnputs(h6->tcpdport, s->rport); ipmove(h6->tcpsrc, s->laddr); ipmove(h6->tcpdst, s->raddr); - mss = DEF_MSS6; break; default: panic("inittcpctl: version %d", s->ipversion); } } - tcb->mss = tcb->cwind = mss; + tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale); tcb->abcbytes = 0; tpriv = s->p->priv; tpriv->stats[Mss] = tcb->mss; @@ -1300,8 +1305,6 @@ static void tcpsndsyn(Conv *s, Tcpctl *tcb) { - Tcppriv *tpriv; - tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); tcb->rttseq = tcb->iss; tcb->snd.wl2 = tcb->iss; @@ -1314,9 +1317,7 @@ tcb->sndsyntime = NOW; /* set desired mss and scale */ - tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale); } void @@ -1492,7 +1493,7 @@ seg.ack = lp->irs+1; seg.flags = SYN|ACK; seg.urg = 0; - seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, 0, &scale); /* our mss, not lp->mss */ seg.wnd = QMAX; /* if the other side set scale, we should too */ @@ -1767,11 +1768,8 @@ tcb->flgcnt = 0; tcb->flags |= SYNACK; - /* our sending max segment size cannot be bigger than what he asked for */ - if(lp->mss != 0 && lp->mss < tcb->mss) { - tcb->mss = lp->mss; - tpriv->stats[Mss] = tcb->mss; - } + /* per rfc, we can't set the mss anymore */ +// tcb->mss = tcb->cwind= tcpmtu(s->p, lp->laddr, lp->version, lp->mss, &tcb->scale); /* window scaling */ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); @@ -3014,7 +3012,6 @@ procsyn(Conv *s, Tcp *seg) { Tcpctl *tcb; - Tcppriv *tpriv; tcb = (Tcpctl*)s->ptcl; tcb->flags |= FORCE; @@ -3026,11 +3023,7 @@ tcb->irs = seg->seq; /* our sending max segment size cannot be bigger than what he asked for */ - if(seg->mss != 0 && seg->mss < tcb->mss) { - tcb->mss = seg->mss; - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; - } + tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion, seg->mss, &tcb->scale); tcb->snd.wnd = seg->wnd; initialwindow(tcb); @@ -3246,7 +3239,6 @@ tcb = (Tcpctl*)s->ptcl; if(s->rport == pdest) if(s->lport == psource) - if(tcb->state != Closed) if(ipcmp(s->raddr, dest) == 0) if(ipcmp(s->laddr, source) == 0){ qlock(s); @@ -3254,6 +3246,12 @@ switch(tcb->state){ case Syn_sent: localclose(s, msg); + break; + case Established: + netlog(tcp->f, Logtcp, "tcpadvise: %I:%d → %I:%d %s\n", + source, psource, dest, pdest, msg); + if(strstr(msg, "DF set") != nil){ + } break; } qunlock(s); --- /sys/src/9/ip/tcp.c Sun May 10 20:36:19 2015 +++ /sys/src/9/ip/tcp.c Sun May 10 20:36:21 2015 @@ -491,11 +491,12 @@ s = (Tcpctl*)(c->ptcl); return snprint(state, n, - "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + "%s qin %d qout %d rq %d.%d mss %d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", tcpstates[s->state], c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0, s->nreseq, s->reseqlen, + s->mss, s->srtt, s->mdev, s->ssthresh, s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, s->qscale, @@ -843,23 +844,27 @@ /* mtu (- TCP + IP hdr len) of 1st hop */ static int -tcpmtu(Proto *tcp, uchar *addr, int version, uint *scale) +tcpmtu(Proto *tcp, uchar *addr, int version, uint reqmss, uint *scale) { + Tcppriv *tpriv; Ipifc *ifc; int mtu; ifc = findipifc(tcp->f, addr, 0); + tpriv = tcp->priv; switch(version){ default: case V4: mtu = DEF_MSS; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE); + tpriv->stats[Mss] = mtu; /* nope -- we can have >1 ifc per ip stack */ break; case V6: mtu = DEF_MSS6; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE); + tpriv->stats[Mss] = mtu + (TCP6_PKT + TCP6_HDRSIZE) - (TCP4_PKT + TCP4_HDRSIZE); break; } /* @@ -868,6 +873,10 @@ */ *scale = Defadvscale; + /* our sending max segment size cannot be bigger than what he asked for */ + if(reqmss != 0 && reqmss < mtu) + mtu = reqmss; + return mtu; } @@ -878,7 +887,6 @@ Tcp4hdr* h4; Tcp6hdr* h6; Tcppriv *tpriv; - int mss; tcb = (Tcpctl*)s->ptcl; @@ -900,8 +908,6 @@ tcb->katimer.func = tcpkeepalive; tcb->katimer.arg = s; - mss = DEF_MSS; - /* create a prototype(pseudo) header */ if(mode != TCP_LISTEN){ if(ipcmp(s->laddr, IPnoaddr) == 0) @@ -925,14 +931,13 @@ hnputs(h6->tcpdport, s->rport); ipmove(h6->tcpsrc, s->laddr); ipmove(h6->tcpdst, s->raddr); - mss = DEF_MSS6; break; default: panic("inittcpctl: version %d", s->ipversion); } } - tcb->mss = tcb->cwind = mss; + tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale); tcb->abcbytes = 0; tpriv = s->p->priv; tpriv->stats[Mss] = tcb->mss; @@ -1300,8 +1305,6 @@ static void tcpsndsyn(Conv *s, Tcpctl *tcb) { - Tcppriv *tpriv; - tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); tcb->rttseq = tcb->iss; tcb->snd.wl2 = tcb->iss; @@ -1314,9 +1317,7 @@ tcb->sndsyntime = NOW; /* set desired mss and scale */ - tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale); } void @@ -1492,7 +1493,7 @@ seg.ack = lp->irs+1; seg.flags = SYN|ACK; seg.urg = 0; - seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, 0, &scale); /* our mss, not lp->mss */ seg.wnd = QMAX; /* if the other side set scale, we should too */ @@ -1767,11 +1768,8 @@ tcb->flgcnt = 0; tcb->flags |= SYNACK; - /* our sending max segment size cannot be bigger than what he asked for */ - if(lp->mss != 0 && lp->mss < tcb->mss) { - tcb->mss = lp->mss; - tpriv->stats[Mss] = tcb->mss; - } + /* per rfc, we can't set the mss anymore */ +// tcb->mss = tcb->cwind= tcpmtu(s->p, lp->laddr, lp->version, lp->mss, &tcb->scale); /* window scaling */ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); @@ -3014,7 +3012,6 @@ procsyn(Conv *s, Tcp *seg) { Tcpctl *tcb; - Tcppriv *tpriv; tcb = (Tcpctl*)s->ptcl; tcb->flags |= FORCE; @@ -3026,11 +3023,7 @@ tcb->irs = seg->seq; /* our sending max segment size cannot be bigger than what he asked for */ - if(seg->mss != 0 && seg->mss < tcb->mss) { - tcb->mss = seg->mss; - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; - } + tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion, seg->mss, &tcb->scale); tcb->snd.wnd = seg->wnd; initialwindow(tcb); @@ -3246,7 +3239,6 @@ tcb = (Tcpctl*)s->ptcl; if(s->rport == pdest) if(s->lport == psource) - if(tcb->state != Closed) if(ipcmp(s->raddr, dest) == 0) if(ipcmp(s->laddr, source) == 0){ qlock(s); @@ -3254,6 +3246,12 @@ switch(tcb->state){ case Syn_sent: localclose(s, msg); + break; + case Established: + netlog(tcp->f, Logtcp, "tcpadvise: %I:%d → %I:%d %s\n", + source, psource, dest, pdest, msg); + if(strstr(msg, "DF set") != nil){ + } break; } qunlock(s);