From d2a7d886624c56673a6d7ba7d6a7958d2be5b867 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Sat, 12 Mar 2022 20:53:17 +0000 Subject: [PATCH] devip: implement network address translation routes This adds a new route "t"-flag that enables network address translation, replacing the source address (and local port) of a forwarded packet to one of the outgoing interface. The state for a translation is kept in a new Translation structure, which contains two Iphash entries, so it can be inserted into the per protocol 4-tuple hash table, requiering no extra lookups. Translations have a low overhead (~200 bytes on amd64), so we can have many of them. They get reused after 5 minutes of inactivity or when the per protocol limit of 1000 entries is reached (then the one with longest inactivity is reused). The protocol needs to export a "forward" function that is responsible for modifying the forwarded packet, and then handle translations in its input function for iphash hits with Iphash.trans != 0. This patch also fixes a few minor things found during development: - Include the Iphash in the Conv structure, avoiding estra malloc - Fix ttl exceeded check (ttl < 1 -> ttl <= 1) - Router should not reply with ttl exceeded for multicast flows - Extra checks for icmp advice to avoid protocol confusions. --- sys/man/3/ip | 7 +- sys/src/9/ip/devip.c | 70 +++++-- sys/src/9/ip/icmp.c | 189 +++++++++++++---- sys/src/9/ip/icmp6.c | 6 +- sys/src/9/ip/il.c | 9 +- sys/src/9/ip/ip.c | 40 +++- sys/src/9/ip/ip.h | 125 ++++++++---- sys/src/9/ip/ipaux.c | 450 +++++++++++++++++++++++++++++------------ sys/src/9/ip/ipifc.c | 15 -- sys/src/9/ip/iproute.c | 7 + sys/src/9/ip/ipv6.c | 5 +- sys/src/9/ip/netlog.c | 1 + sys/src/9/ip/rudp.c | 22 +- sys/src/9/ip/tcp.c | 174 ++++++++++------ sys/src/9/ip/udp.c | 126 +++++++++--- 15 files changed, 890 insertions(+), 356 deletions(-) diff --git a/sys/man/3/ip b/sys/man/3/ip index c1d3110e5..14c630d0a 100644 --- a/sys/man/3/ip +++ b/sys/man/3/ip @@ -411,6 +411,9 @@ multicast route .TP .B p point-to-point route +.TP +.B t +network address translation on source .PD .PP The tag is an arbitrary, up to 4 character, string. It is normally used to @@ -442,7 +445,7 @@ with all subsequent routes added via this file descriptor. .TP .BI add\ "target mask nexthop tag interface source smask" .TP -.BI add\ "target mask nexthop type tag interface source smask" +.BI add\ "target mask nexthop flags tag interface source smask" Add the route to the table. If one already exists with the same target and mask, replace it. The .I interface @@ -461,7 +464,7 @@ IP address on the desired interface. .TP .BI remove\ "target mask nexthop tag interface source smask" .TP -.BI remove\ "target mask nexthop type tag interface source smask" +.BI remove\ "target mask nexthop flags tag interface source smask" Remove the matching route. . .SS "Address resolution diff --git a/sys/src/9/ip/devip.c b/sys/src/9/ip/devip.c index 403faa083..cb6be1d1c 100644 --- a/sys/src/9/ip/devip.c +++ b/sys/src/9/ip/devip.c @@ -737,6 +737,7 @@ setladdr(Conv* c) char* setluniqueport(Conv* c, int lport) { + Translation *q; Proto *p; Conv *xp; int x; @@ -754,14 +755,22 @@ setluniqueport(Conv* c, int lport) && xp->lport == lport && xp->rport == c->rport && ipcmp(xp->raddr, c->raddr) == 0 - && ipcmp(xp->laddr, c->laddr) == 0){ - qunlock(p); - return "address in use"; - } + && ipcmp(xp->laddr, c->laddr) == 0) + goto Inuse; + } + for(q = p->translations; q != nil; q = q->next){ + if(q->backward.lport == lport + && q->backward.rport == c->rport + && ipcmp(q->backward.raddr, c->raddr) == 0 + && ipcmp(q->backward.laddr, c->laddr) == 0) + goto Inuse; } c->lport = lport; qunlock(p); return nil; +Inuse: + qunlock(p); + return "address in use"; } /* @@ -770,18 +779,51 @@ setluniqueport(Conv* c, int lport) static int lportinuse(Proto *p, ushort lport) { + Translation *q; int x; for(x = 0; x < p->nc && p->conv[x]; x++) if(p->conv[x]->lport == lport) return 1; + for(q = p->translations; q != nil; q = q->next) + if(q->backward.lport == lport) + return 1; return 0; } +/* + * find a unused loacal port for a protocol. + * + * p needs to be locked + */ +int +unusedlport(Proto *p) +{ + ushort port; + int i; + + /* + * Unrestricted ports are chosen randomly + * between 2^15 and 2^16. There are at most + * 4*Nchan = 4096 ports in use at any given time, + * so even in the worst case, a random probe has a + * 1 - 4096/2^15 = 87% chance of success. + * If 64 successive probes fail, there is a bug somewhere + * (or a once in 10^58 event has happened, but that's + * less likely than a venti collision). + */ + for(i=0; i<64; i++){ + port = (1<<15) + nrand(1<<15); + if(!lportinuse(p, port)) + return port; + } + return -1; +} + /* * pick a local port and set it */ -char * +static char * setlport(Conv* c) { Proto *p; @@ -799,21 +841,9 @@ setlport(Conv* c) goto chosen; } }else{ - /* - * Unrestricted ports are chosen randomly - * between 2^15 and 2^16. There are at most - * 4*Nchan = 4096 ports in use at any given time, - * so even in the worst case, a random probe has a - * 1 - 4096/2^15 = 87% chance of success. - * If 64 successive probes fail, there is a bug somewhere - * (or a once in 10^58 event has happened, but that's - * less likely than a venti collision). - */ - for(i=0; i<64; i++){ - port = (1<<15) + nrand(1<<15); - if(!lportinuse(p, port)) - goto chosen; - } + port = unusedlport(p); + if(port > 0) + goto chosen; } qunlock(p); return "no ports available"; diff --git a/sys/src/9/ip/icmp.c b/sys/src/9/ip/icmp.c index b17054c4c..526540af3 100644 --- a/sys/src/9/ip/icmp.c +++ b/sys/src/9/ip/icmp.c @@ -99,6 +99,8 @@ struct Icmppriv /* message counts */ ulong in[Maxtype+1]; ulong out[Maxtype+1]; + + Ipht ht; }; static void icmpkick(void *x, Block*); @@ -192,9 +194,9 @@ ip4reply(Fs *f, uchar ip4[4]) uchar addr[IPaddrlen]; int i; - v4tov6(addr, ip4); - if(ipismulticast(addr)) + if(isv4mcast(ip4)) return 0; + v4tov6(addr, ip4); i = ipforme(f, addr); return i == 0 || i == Runi; } @@ -204,9 +206,9 @@ ip4me(Fs *f, uchar ip4[4]) { uchar addr[IPaddrlen]; - v4tov6(addr, ip4); - if(ipismulticast(addr)) + if(isv4mcast(ip4)) return 0; + v4tov6(addr, ip4); return ipforme(f, addr) == Runi; } @@ -218,7 +220,7 @@ icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp) uchar ia[IPv4addrlen]; p = (Icmp *)bp->rp; - if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src)) + if(isv4mcast(p->dst) || !ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src)) return; netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n", @@ -249,7 +251,7 @@ icmpunreachable(Fs *f, Ipifc *ifc, Block *bp, int code, int seq) uchar ia[IPv4addrlen]; p = (Icmp *)bp->rp; - if(!ip4reply(f, p->src)) + if(isv4mcast(p->dst) || !ip4reply(f, p->src)) return; if(ifc == nil){ @@ -302,21 +304,43 @@ icmpcantfrag(Fs *f, Block *bp, int mtu) static void goticmpkt(Proto *icmp, Block *bp) { - ushort recid; uchar dst[IPaddrlen], src[IPaddrlen]; + ushort recid; Conv **c, *s; + Iphash *iph; Icmp *p; - - p = (Icmp *) bp->rp; + + p = (Icmp *)bp->rp; v4tov6(dst, p->dst); v4tov6(src, p->src); recid = nhgets(p->icmpid); + qlock(icmp); + iph = iphtlook(&((Icmppriv*)icmp->priv)->ht, src, recid, dst, recid); + if(iph != nil){ + Translation *q; + int hop = p->ttl; + + if(hop <= 1 || (q = transbackward(icmp, iph)) == nil) + goto raise; + memmove(p->dst, q->forward.raddr+IPv4off, IPv4addrlen); + hnputs_csum(p->icmpid, q->forward.rport, p->cksum); + + /* only use route-hint when from original desination */ + if(memcmp(p->src, q->forward.laddr+IPv4off, IPv4addrlen) != 0) + q = nil; + qunlock(icmp); + + ipoput4(icmp->f, bp, 1, hop - 1, p->tos, q); + return; + } for(c = icmp->conv; (s = *c) != nil; c++){ if(s->lport == recid) if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0) qpass(s->rq, copyblock(bp, blocklen(bp))); } +raise: + qunlock(icmp); freeblist(bp); } @@ -404,31 +428,6 @@ icmpiput(Proto *icmp, Ipifc*, Block *bp) ipriv->out[EchoReply]++; ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); break; - case Unreachable: - if(p->code >= nelem(unreachcode)) { - snprint(m2, sizeof m2, "unreachable %V -> %V code %d", - p->src, p->dst, p->code); - msg = m2; - } else - msg = unreachcode[p->code]; - - Advise: - bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; - if(BLEN(bp) < MinAdvise){ - ipriv->stats[LenErrs]++; - goto raise; - } - p = (Icmp *)bp->rp; - if((nhgets(p->frag) & IP_FO) == 0){ - pr = Fsrcvpcolx(icmp->f, p->proto); - if(pr != nil && pr->advise != nil) { - (*pr->advise)(pr, bp, msg); - return; - } - } - bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; - goticmpkt(icmp, bp); - break; case TimeExceed: if(p->code == 0){ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src); @@ -436,29 +435,117 @@ icmpiput(Proto *icmp, Ipifc*, Block *bp) } goticmpkt(icmp, bp); break; + case Unreachable: + if(p->code >= nelem(unreachcode)) { + snprint(m2, sizeof m2, "unreachable %V -> %V code %d", + p->src, p->dst, p->code); + msg = m2; + } else + msg = unreachcode[p->code]; + Advise: + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(BLEN(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + if(p->vihl == (IP_VER4|IP_HLEN4) /* advise() does not expect options */ + && (nhgets(p->frag) & IP_FO) == 0 /* first fragment */ + && ipcsum(&p->vihl) == 0){ + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + netlog(icmp->f, Logicmp, "advising %s!%V -> %V: %s\n", pr->name, p->src, p->dst, msg); + (*pr->advise)(pr, bp, msg); + return; + } + } + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + /* wet floor */ default: goticmpkt(icmp, bp); break; } return; - raise: freeblist(bp); } +/* + * called from protocol advice handlers when the advice + * is actually for someone we source translate (ip4). + * the caller has fixed up the ip address and ports + * in the inner header, so we just restore the outer + * ip/icmp headers, recalculating icmp checksum + * and send the advice to ip4. + */ +void +icmpproxyadvice(Fs *f, Block *bp, uchar *ip4) +{ + Icmp *p; + int hop; + + /* inner header */ + p = (Icmp *) bp->rp; + if(p->vihl != (IP_VER4|IP_HLEN4)) + goto drop; + if(ipcsum(&p->vihl) != 0) + goto drop; + + /* outer header */ + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + p = (Icmp *) bp->rp; + if(p->vihl != (IP_VER4|(ICMP_IPSIZE>>2))) + goto drop; + + hop = p->ttl; + if(hop <= 1) + goto drop; + + netlog(f, Logicmp|Logtrans, "proxying icmp advice from %V to %V->%V\n", + p->src, p->dst, ip4); + memmove(p->dst, ip4, IPv4addrlen); + + /* recalculate ICMP checksum */ + memset(p->cksum, 0, sizeof(p->cksum)); + hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + + ipoput4(f, bp, 1, hop - 1, p->tos, nil); + return; +drop: + freeblist(bp); +} + static void icmpadvise(Proto *icmp, Block *bp, char *msg) { - ushort recid; uchar dst[IPaddrlen], src[IPaddrlen]; + ushort recid; Conv **c, *s; Icmp *p; + Iphash *iph; p = (Icmp *) bp->rp; v4tov6(dst, p->dst); v4tov6(src, p->src); recid = nhgets(p->icmpid); + qlock(icmp); + iph = iphtlook(&((Icmppriv*)icmp->priv)->ht, dst, recid, src, recid); + if(iph != nil){ + Translation *q; + + if((q = transbackward(icmp, iph)) == nil) + goto raise; + + hnputs_csum(p->src+0, nhgets(q->forward.raddr+IPv4off+0), p->ipcksum); + hnputs_csum(p->src+2, nhgets(q->forward.raddr+IPv4off+2), p->ipcksum); + + hnputs_csum(p->icmpid, q->forward.rport, p->cksum); + qunlock(icmp); + + icmpproxyadvice(icmp->f, bp, p->src); + return; + } for(c = icmp->conv; (s = *c) != nil; c++){ if(s->lport == recid) if(ipcmp(s->laddr, src) == 0) @@ -470,9 +557,38 @@ icmpadvise(Proto *icmp, Block *bp, char *msg) break; } } +raise: + qunlock(icmp); freeblist(bp); } +static Block* +icmpforward(Proto *icmp, Block *bp, Route *r) +{ + uchar da[IPaddrlen], sa[IPaddrlen]; + ushort id; + Icmp *p; + Translation *q; + + p = (Icmp*)(bp->rp); + v4tov6(sa, p->src); + v4tov6(da, p->dst); + id = nhgets(p->icmpid); + + qlock(icmp); + q = transforward(icmp, &((Icmppriv*)icmp->priv)->ht, sa, id, da, id, r); + if(q == nil){ + qunlock(icmp); + freeblist(bp); + return nil; + } + memmove(p->src, q->backward.laddr+IPv4off, IPv4addrlen); + hnputs_csum(p->icmpid, q->backward.lport, p->cksum); + qunlock(icmp); + + return bp; +} + static int icmpstats(Proto *icmp, char *buf, int len) { @@ -511,6 +627,7 @@ icmpinit(Fs *fs) icmp->stats = icmpstats; icmp->ctl = nil; icmp->advise = icmpadvise; + icmp->forward = icmpforward; icmp->gc = nil; icmp->ipproto = IP_ICMPPROTO; icmp->nc = 128; diff --git a/sys/src/9/ip/icmp6.c b/sys/src/9/ip/icmp6.c index a4b4b50c8..5db8ef11b 100644 --- a/sys/src/9/ip/icmp6.c +++ b/sys/src/9/ip/icmp6.c @@ -711,9 +711,8 @@ icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp) goto raise; } p = (IPICMP *)bp->rp; - /* get rid of fragment header if this is the first fragment */ - if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){ + if((p->vcf[0] & 0xF0) == IP_VER6 && p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){ Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR); if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */ p->proto = fh->nexthdr; @@ -725,9 +724,10 @@ icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp) bp->rp -= IP6HDR; } } - if(p->proto != FH){ + if((p->vcf[0] & 0xF0) == IP_VER6 && p->proto != FH){ pr = Fsrcvpcolx(icmp->f, p->proto); if(pr != nil && pr->advise != nil) { + netlog(icmp->f, Logicmp, "advising %s!%I -> %I: %s\n", pr->name, p->src, p->dst, msg); (*pr->advise)(pr, bp, msg); return; } diff --git a/sys/src/9/ip/il.c b/sys/src/9/ip/il.c index 1ee07e863..bf72ec838 100644 --- a/sys/src/9/ip/il.c +++ b/sys/src/9/ip/il.c @@ -308,8 +308,8 @@ illocalclose(Conv *c) ic = (Ilcb*)c->ptcl; ic->state = Ilclosed; iphtrem(&ipriv->ht, c); - ipmove(c->laddr, IPnoaddr); c->lport = 0; + ipmove(c->laddr, IPnoaddr); } static void @@ -544,6 +544,7 @@ iliput(Proto *il, Ipifc*, Block *bp) uchar laddr[IPaddrlen]; ushort sp, dp, csum; int plen, illen; + Iphash *iph; Conv *new, *s; Ilpriv *ipriv; @@ -584,14 +585,14 @@ iliput(Proto *il, Ipifc*, Block *bp) } qlock(il); - s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp); - if(s == nil){ + iph = iphtlook(&ipriv->ht, raddr, dp, laddr, sp); + if(iph == nil){ if(ih->iltype == Ilsync) ilreject(il->f, ih); /* no listener */ qunlock(il); goto raise; } - + s = iphconv(iph); ic = (Ilcb*)s->ptcl; if(ic->state == Illistening){ if(ih->iltype != Ilsync){ diff --git a/sys/src/9/ip/ip.c b/sys/src/9/ip/ip.c index 284ef0b77..de8b20a0d 100644 --- a/sys/src/9/ip/ip.c +++ b/sys/src/9/ip/ip.c @@ -252,7 +252,7 @@ free: void ipiput4(Fs *f, Ipifc *ifc, Block *bp) { - int hl, len, hop, tos; + int hl, len, hop; uchar v6dst[IPaddrlen]; ushort frag; Ip4hdr *h; @@ -327,14 +327,42 @@ ipiput4(Fs *f, Ipifc *ifc, Block *bp) /* don't forward if packet has timed out */ hop = h->ttl; - if(hop < 1) { + if(hop <= 1) { ip->stats[InHdrErrors]++; icmpttlexceeded(f, ifc, bp); goto drop; } - /* reassemble if the interface expects it */ - if(nifc->reassemble){ + if(r->type & Rtrans) { + p = Fsrcvpcolx(f, h->proto); + if(p == nil || p->forward == nil){ + ip->stats[OutDiscards]++; + goto drop; + } + + if(hl > IP4HDR) { + hl -= IP4HDR; + len -= hl; + bp->rp += hl; + memmove(bp->rp, h, IP4HDR); + h = (Ip4hdr*)bp->rp; + h->vihl = IP_VER4|IP_HLEN4; + hnputs(h->length, len); + } + + frag = nhgets(h->frag); + if(frag & (IP_MF|IP_FO)) { + bp = ip4reassemble(ip, frag, bp); + if(bp == nil) + return; + } + + bp = (*p->forward)(p, bp, r); + if(bp == nil) + return; + h = (Ip4hdr*)bp->rp; + } else if(nifc->reassemble) { + /* reassemble as the interface expects it */ frag = nhgets(h->frag); if(frag & (IP_MF|IP_FO)) { bp = ip4reassemble(ip, frag, bp); @@ -345,9 +373,7 @@ ipiput4(Fs *f, Ipifc *ifc, Block *bp) } ip->stats[ForwDatagrams]++; - tos = h->tos; - hop = h->ttl; - ipoput4(f, bp, 1, hop - 1, tos, &rh); + ipoput4(f, bp, 1, hop - 1, h->tos, &rh); return; } diff --git a/sys/src/9/ip/ip.h b/sys/src/9/ip/ip.h index 0635e1f0e..ca77043cd 100644 --- a/sys/src/9/ip/ip.h +++ b/sys/src/9/ip/ip.h @@ -22,6 +22,7 @@ typedef struct Arpent Arpent; typedef struct Arp Arp; typedef struct Route Route; typedef struct Routehint Routehint; +typedef struct Translation Translation; typedef struct Routerparams Routerparams; typedef struct Hostparams Hostparams; @@ -177,6 +178,71 @@ struct Routehint Arpent *a; /* last arp entry used */ }; +/* + * hash table for 2 ip addresses + 2 ports + */ +enum +{ + Nipht= 521, /* convenient prime */ + + IPmatchexact= 0, /* match on 4 tuple */ + IPmatchany, /* *!* */ + IPmatchport, /* *!port */ + IPmatchaddr, /* addr!* */ + IPmatchpa, /* addr!port */ +}; + +struct Iphash +{ + Iphash *nextiphash; + + uchar trans; /* 0 = conv, 1 = foward, 2 = backward */ + uchar match; + ushort lport; /* local port number */ + ushort rport; /* remote port number */ + uchar laddr[IPaddrlen]; /* local IP address */ + uchar raddr[IPaddrlen]; /* remote IP address */ +}; + +struct Ipht +{ + Lock; + Iphash *tab[Nipht]; +}; + +void iphtadd(Ipht*, Iphash*); +void iphtrem(Ipht*, Iphash*); +Iphash *iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); + +/* + * NAT entry. + * + * This holds the 5 tuple as two Iphashes. + * The "forward" hash matches the the packets from + * the source that need to be translated and + * "backward" matches the packets coming back + * from the destination. + */ +struct Translation +{ + Translation *next; + Translation **link; + + ulong time; + + Iphash forward; +#define iphforward(h) ((Translation*)((char*)(h) - (char*)&((Translation*)0)->forward)) + + Iphash backward; +#define iphbackward(h) ((Translation*)((char*)(h) - (char*)&((Translation*)0)->backward)) + + /* used for forwarding to the source */ + Routehint; +}; + +Translation *transforward(Proto *p, Ipht *ht, uchar *sa, int sp, uchar *da, int dp, Route *r); +Translation *transbackward(Proto *p, Iphash *iph); + /* * one per conversation directory */ @@ -187,16 +253,15 @@ struct Conv int x; /* conversation index */ Proto* p; - int restricted; /* remote port is restricted */ - int ignoreadvice; /* don't terminate connection on icmp errors */ uint ttl; /* max time to live */ uint tos; /* type of service */ + uchar restricted; /* remote port is restricted */ + uchar ignoreadvice; /* don't terminate connection on icmp errors */ uchar ipversion; - uchar laddr[IPaddrlen]; /* local IP address */ - uchar raddr[IPaddrlen]; /* remote IP address */ - ushort lport; /* local port number */ - ushort rport; /* remote port number */ + + Iphash; +#define iphconv(h) ((Conv*)((char*)(h) - (char*)&((Conv*)0)->Iphash)) char *owner; /* protections */ int perm; @@ -206,7 +271,6 @@ struct Conv /* udp specific */ int headers; /* data src/dst headers in udp */ - int reliable; /* true if reliable udp */ Conv* incall; /* calls waiting to be listened for */ Conv* next; @@ -351,34 +415,6 @@ struct Ipmulti Ipmulti *next; }; -/* - * hash table for 2 ip addresses + 2 ports - */ -enum -{ - Nipht= 521, /* convenient prime */ - - IPmatchexact= 0, /* match on 4 tuple */ - IPmatchany, /* *!* */ - IPmatchport, /* *!port */ - IPmatchaddr, /* addr!* */ - IPmatchpa, /* addr!port */ -}; -struct Iphash -{ - Iphash *next; - Conv *c; - int match; -}; -struct Ipht -{ - Lock; - Iphash *tab[Nipht]; -}; -void iphtadd(Ipht*, Conv*); -void iphtrem(Ipht*, Conv*); -Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); - /* * one per multiplexed protocol */ @@ -412,9 +448,15 @@ struct Proto Qid qid; /* qid for protocol directory */ ushort nextrport; + /* network address translation */ + Translation* translations; + Block* (*forward)(Proto*, Block*, Route*); + void *priv; }; +int unusedlport(Proto *p); + /* * one per IP protocol stack @@ -489,6 +531,7 @@ enum Logrudpmsg= 1<<16, Logesp= 1<<17, Logtcpwin= 1<<18, + Logtrans= 1<<19, }; void netloginit(Fs*); @@ -522,7 +565,9 @@ enum Rbcast= (1<<4), /* a broadcast self address */ Rmulti= (1<<5), /* a multicast self address */ Rproxy= (1<<6), /* this route should be proxied */ - Rsrc= (1<<7), /* source specific route */ + Rtrans= (1<<7), /* this route translates source address (NAT) */ + + Rsrc= (1<<8), /* source specific route */ }; struct RouteTree @@ -533,7 +578,7 @@ struct RouteTree Ipifc *ifc; uchar ifcid; /* must match ifc->id */ uchar depth; - uchar type; + ushort type; char tag[4]; int ref; }; @@ -641,10 +686,13 @@ extern int isv4(uchar*); extern void v4tov6(uchar *v6, uchar *v4); extern int v6tov4(uchar *v4, uchar *v6); extern int eipfmt(Fmt*); +extern int ipismulticast(uchar *ip); extern int convipvers(Conv *c); +extern void hnputs_csum(void *p, ushort v, uchar *pcsum); #define ipmove(x, y) memmove(x, y, IPaddrlen) #define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) ) +#define isv4mcast(ip4) ((ip4)[0] >= 0xe0 && (ip4)[0] < 0xf0) extern uchar IPv4bcast[IPaddrlen]; extern uchar IPv4bcastobs[IPaddrlen]; @@ -670,7 +718,6 @@ extern Medium* ipfindmedium(char *name); extern void addipmedium(Medium *med); extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip, Routehint *rh); extern int ipforme(Fs*, uchar *addr); -extern int ipismulticast(uchar *ip); extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type); extern Ipifc* findipifcstr(Fs *f, char *s); extern void findlocalip(Fs*, uchar *local, uchar *remote); @@ -694,6 +741,8 @@ extern void icmpnohost(Fs*, Ipifc*, Block*); extern void icmpnoconv(Fs*, Block*); extern void icmpcantfrag(Fs*, Block*, int); extern void icmpttlexceeded(Fs*, Ipifc*, Block*); +extern void icmpproxyadvice(Fs *, Block*, uchar*); + extern ushort ipcsum(uchar*); extern void ipiput4(Fs*, Ipifc*, Block*); extern void ipiput6(Fs*, Ipifc*, Block*); diff --git a/sys/src/9/ip/ipaux.c b/sys/src/9/ip/ipaux.c index 549df1f6e..50e4e6cc1 100644 --- a/sys/src/9/ip/ipaux.c +++ b/sys/src/9/ip/ipaux.c @@ -203,7 +203,6 @@ ipv62smcast(uchar *smcast, uchar *a) smcast[15] = a[15]; } - /* * parse a hex mac address */ @@ -233,140 +232,23 @@ parsemac(uchar *to, char *from, int len) } /* - * hashing tcp, udp, ... connections + * return multicast version if any */ -ulong -iphash(uchar *sa, ushort sp, uchar *da, ushort dp) +int +ipismulticast(uchar *ip) { - return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht; -} - -void -iphtadd(Ipht *ht, Conv *c) -{ - ulong hv; - Iphash *h; - - hv = iphash(c->raddr, c->rport, c->laddr, c->lport); - h = smalloc(sizeof(*h)); - if(ipcmp(c->raddr, IPnoaddr) != 0) - h->match = IPmatchexact; - else { - if(ipcmp(c->laddr, IPnoaddr) != 0){ - if(c->lport == 0) - h->match = IPmatchaddr; - else - h->match = IPmatchpa; - } else { - if(c->lport == 0) - h->match = IPmatchany; - else - h->match = IPmatchport; - } + if(isv4(ip)){ + if(isv4mcast(&ip[IPv4off])) + return V4; } - h->c = c; - - lock(ht); - h->next = ht->tab[hv]; - ht->tab[hv] = h; - unlock(ht); + else if(isv6mcast(ip)) + return V6; + return 0; } -void -iphtrem(Ipht *ht, Conv *c) -{ - ulong hv; - Iphash **l, *h; - - hv = iphash(c->raddr, c->rport, c->laddr, c->lport); - lock(ht); - for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next) - if((*l)->c == c){ - h = *l; - (*l) = h->next; - free(h); - break; - } - unlock(ht); -} - -/* look for a matching conversation with the following precedence - * connected && raddr,rport,laddr,lport - * announced && laddr,lport - * announced && *,lport - * announced && laddr,* - * announced && *,* +/* + * return ip version of a connection */ -Conv* -iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) -{ - ulong hv; - Iphash *h; - Conv *c; - - /* exact 4 pair match (connection) */ - hv = iphash(sa, sp, da, dp); - lock(ht); - for(h = ht->tab[hv]; h != nil; h = h->next){ - if(h->match != IPmatchexact) - continue; - c = h->c; - if(sp == c->rport && dp == c->lport - && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; - } - } - - /* match local address and port */ - hv = iphash(IPnoaddr, 0, da, dp); - for(h = ht->tab[hv]; h != nil; h = h->next){ - if(h->match != IPmatchpa) - continue; - c = h->c; - if(dp == c->lport && ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; - } - } - - /* match just port */ - hv = iphash(IPnoaddr, 0, IPnoaddr, dp); - for(h = ht->tab[hv]; h != nil; h = h->next){ - if(h->match != IPmatchport) - continue; - c = h->c; - if(dp == c->lport){ - unlock(ht); - return c; - } - } - - /* match local address */ - hv = iphash(IPnoaddr, 0, da, 0); - for(h = ht->tab[hv]; h != nil; h = h->next){ - if(h->match != IPmatchaddr) - continue; - c = h->c; - if(ipcmp(da, c->laddr) == 0){ - unlock(ht); - return c; - } - } - - /* look for something that matches anything */ - hv = iphash(IPnoaddr, 0, IPnoaddr, 0); - for(h = ht->tab[hv]; h != nil; h = h->next){ - if(h->match != IPmatchany) - continue; - c = h->c; - unlock(ht); - return c; - } - unlock(ht); - return nil; -} - int convipvers(Conv *c) { @@ -375,3 +257,313 @@ convipvers(Conv *c) else return V6; } + +/* + * hashing tcp, udp, ... connections + */ +static ulong +iphash(uchar *sa, ushort sp, uchar *da, ushort dp) +{ + return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht; +} + +void +iphtadd(Ipht *ht, Iphash *h) +{ + ulong hv; + + if(ipcmp(h->raddr, IPnoaddr) != 0) + h->match = IPmatchexact; + else { + if(ipcmp(h->laddr, IPnoaddr) != 0){ + if(h->lport == 0) + h->match = IPmatchaddr; + else + h->match = IPmatchpa; + } else { + if(h->lport == 0) + h->match = IPmatchany; + else + h->match = IPmatchport; + } + } + lock(ht); + hv = iphash(h->raddr, h->rport, h->laddr, h->lport); + h->nextiphash = ht->tab[hv]; + ht->tab[hv] = h; + unlock(ht); +} + +void +iphtrem(Ipht *ht, Iphash *h) +{ + ulong hv; + Iphash **l; + + lock(ht); + hv = iphash(h->raddr, h->rport, h->laddr, h->lport); + for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->nextiphash) + if(*l == h){ + (*l) = h->nextiphash; + h->nextiphash = nil; + break; + } + unlock(ht); +} + +/* look for a matching iphash with the following precedence + * raddr,rport,laddr,lport + * laddr,lport + * *,lport + * laddr,* + * *,* + */ +Iphash* +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +{ + ulong hv; + Iphash *h; + + lock(ht); + /* exact 4 pair match (connection) */ + hv = iphash(sa, sp, da, dp); + for(h = ht->tab[hv]; h != nil; h = h->nextiphash){ + if(h->match != IPmatchexact) + continue; + if(sp == h->rport && dp == h->lport + && ipcmp(sa, h->raddr) == 0 && ipcmp(da, h->laddr) == 0){ + unlock(ht); + return h; + } + } + + /* match local address and port */ + hv = iphash(IPnoaddr, 0, da, dp); + for(h = ht->tab[hv]; h != nil; h = h->nextiphash){ + if(h->match != IPmatchpa) + continue; + if(dp == h->lport && ipcmp(da, h->laddr) == 0){ + unlock(ht); + return h; + } + } + + /* match just port */ + hv = iphash(IPnoaddr, 0, IPnoaddr, dp); + for(h = ht->tab[hv]; h != nil; h = h->nextiphash){ + if(h->match != IPmatchport) + continue; + if(dp == h->lport){ + unlock(ht); + return h; + } + } + + /* match local address */ + hv = iphash(IPnoaddr, 0, da, 0); + for(h = ht->tab[hv]; h != nil; h = h->nextiphash){ + if(h->match != IPmatchaddr) + continue; + if(ipcmp(da, h->laddr) == 0){ + unlock(ht); + return h; + } + } + + /* look for something that matches anything */ + hv = iphash(IPnoaddr, 0, IPnoaddr, 0); + for(h = ht->tab[hv]; h != nil; h = h->nextiphash){ + if(h->match != IPmatchany) + continue; + unlock(ht); + return h; + } + unlock(ht); + return nil; +} + +/* + * Move entry to front of Proto.translations + * and update the timestamp. + * + * Proto is locked. + */ +static Translation* +transupdate(Proto *p, Translation *q) +{ + q->time = NOW; + + /* unlink */ + if(q->link != nil && (*q->link = q->next) != nil) + q->next->link = q->link; + + /* link to front */ + if((q->next = p->translations) != nil) + q->next->link = &q->next; + p->translations = q; + q->link = &p->translations; + + return q; +} + +/* + * Called with the 4-tuple (sa,sp,da,dp) + * that should be source translated, + * returning the translation. + * + * Proto is locked. + */ +Translation* +transforward(Proto *p, Ipht *ht, uchar *sa, int sp, uchar *da, int dp, Route *r) +{ + uchar ia[IPaddrlen]; + Routehint rh; + Translation *q; + Iphash *iph; + Ipifc *ifc; + int lport; + ulong now; + int num; + + /* Translation already exists? */ + iph = iphtlook(ht, sa, sp, da, dp); + if(iph != nil) { + if(iph->trans != 1) + return nil; + return transupdate(p, iphforward(iph)); + } + + /* Bad source address? */ + if(ipismulticast(sa) || ipforme(p->f, sa) != 0){ + netlog(p->f, Logtrans, "trans: bad source address: %s!%I!%d -> %I!%d\n", + p->name, sa, sp, da, dp); + return nil; + } + + /* Bad forward route? */ + if(r == nil || (ifc = r->ifc) == nil){ + netlog(p->f, Logtrans, "trans: no forward route: %s!%I!%d -> %I!%d\n", + p->name, sa, sp, da, dp); + return nil; + } + + /* Find a source address on the destination interface */ + rlock(ifc); + memmove(ia, v4prefix, IPv4off); + if(!ipv4local(ifc, ia+IPv4off, 0, (r->type & (Rifc|Runi|Rbcast|Rmulti))? da+IPv4off: r->v4.gate)){ + runlock(ifc); + netlog(p->f, Logtrans, "trans: no source ip: %s!%I!%d -> %I!%d\n", + p->name, sa, sp, da, dp); + return nil; + } + runlock(ifc); + + /* Check backward route */ + rh.a = nil; + rh.r = nil; + if(ipismulticast(da)) + r = v4lookup(p->f, sa+IPv4off, ia+IPv4off, nil); + else + r = v4lookup(p->f, sa+IPv4off, da+IPv4off, &rh); + if(r == nil || (r->ifc == ifc && !ifc->reflect)){ + netlog(p->f, Logtrans, "trans: bad backward route: %s!%I!%d <- %I <- %I!%d\n", + p->name, sa, sp, ia, da, dp); + return nil; + } + + /* Find local port */ + lport = unusedlport(p); + if(lport <= 0){ + netlog(p->f, Logtrans, "trans: no local port: %s!%I!%d <- %I <- %I!%d\n", + p->name, sa, sp, ia, da, dp); + return nil; + } + + /* Reuse expired entries */ + num = 0; + now = NOW; + for(q = p->translations; q != nil; q = q->next) { + if(++num >= 1000 || (now - q->time) >= 5*60*1000){ + netlog(p->f, Logtrans, "trans: removing %s!%I!%d -> %I!%d -> %I!%d\n", + p->name, + q->forward.raddr, q->forward.rport, + q->backward.laddr, q->backward.lport, + q->forward.laddr, q->forward.lport); + + iphtrem(ht, &q->forward); + iphtrem(ht, &q->backward); + break; + } + } + if(q == nil){ + q = malloc(sizeof(*q)); + if(q == nil) + return nil; + q->link = nil; + } + + /* Match what needs to be forwarded */ + q->forward.trans = 1; + q->forward.lport = dp; + q->forward.rport = sp; + ipmove(q->forward.laddr, da); + ipmove(q->forward.raddr, sa); + + /* Match what comes back to us */ + q->backward.trans = 2; + q->backward.lport = lport; + ipmove(q->backward.laddr, ia); + if(p->ipproto == 1 || ipismulticast(da)){ + q->backward.rport = 0; + ipmove(q->backward.raddr, IPnoaddr); + } else { + q->backward.rport = dp; + ipmove(q->backward.raddr, da); + } + memmove(&q->Routehint, &rh, sizeof(rh)); + + netlog(p->f, Logtrans, "trans: adding %s!%I!%d -> %I!%d -> %I!%d\n", + p->name, + q->forward.raddr, q->forward.rport, + q->backward.laddr, q->backward.lport, + q->forward.laddr, q->forward.lport); + + iphtadd(ht, &q->forward); + iphtadd(ht, &q->backward); + + return transupdate(p, q); +} + +/* + * Check if backward translation is valid and + * update timestamp. + * + * Proto is locked. + */ +Translation* +transbackward(Proto *p, Iphash *iph) +{ + if(iph == nil || iph->trans != 2) + return nil; + + return transupdate(p, iphbackward(iph)); +} + +/* + * Checksum adjusting hnputs() + */ +void +hnputs_csum(void *p, ushort v, uchar *pcsum) +{ + ulong csum; + + assert((((uchar*)p - pcsum) & 1) == 0); + + csum = nhgets(pcsum)^0xFFFF; + csum += nhgets(p)^0xFFFF; + csum += v; + hnputs(p, v); + while(v = csum >> 16) + csum = (csum & 0xFFFF) + v; + hnputs(pcsum, csum^0xFFFF); +} diff --git a/sys/src/9/ip/ipifc.c b/sys/src/9/ip/ipifc.c index 4f6272bf3..b6fbcc73f 100644 --- a/sys/src/9/ip/ipifc.c +++ b/sys/src/9/ip/ipifc.c @@ -1435,21 +1435,6 @@ ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip) return ipremoteonifc(ifc, ip) != nil; } -/* - * return multicast version if any - */ -int -ipismulticast(uchar *ip) -{ - if(isv4(ip)){ - if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) - return V4; - } - else if(ip[0] == 0xff) - return V6; - return 0; -} - /* * add a multicast address to an interface. */ diff --git a/sys/src/9/ip/iproute.c b/sys/src/9/ip/iproute.c index 72b9fe89a..4e3d57e44 100644 --- a/sys/src/9/ip/iproute.c +++ b/sys/src/9/ip/iproute.c @@ -875,6 +875,9 @@ parseroutetype(char *p) case 'p': if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1; break; + case 't': + if(((type ^= Rtrans) & Rtrans) != Rtrans) return -1; + break; case '\0': return type; } @@ -900,6 +903,10 @@ routetype(int type, char p[8]) if(type & Rptpt) *p++ = 'p'; + + if(type & Rtrans) + *p++ = 't'; + *p = 0; } diff --git a/sys/src/9/ip/ipv6.c b/sys/src/9/ip/ipv6.c index 0ad8d8e86..bfe652d47 100644 --- a/sys/src/9/ip/ipv6.c +++ b/sys/src/9/ip/ipv6.c @@ -278,7 +278,7 @@ ipiput6(Fs *f, Ipifc *ifc, Block *bp) /* don't forward if packet has timed out */ hop = h->ttl; - if(hop < 1) { + if(hop <= 1) { ip->stats[InHdrErrors]++; icmpttlexceeded6(f, ifc, bp); goto drop; @@ -292,8 +292,7 @@ ipiput6(Fs *f, Ipifc *ifc, Block *bp) ip->stats[ForwDatagrams]++; h = (Ip6hdr*)bp->rp; tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2; - hop = h->ttl; - ipoput6(f, bp, 1, hop-1, tos, &rh); + ipoput6(f, bp, 1, hop - 1, tos, &rh); return; } diff --git a/sys/src/9/ip/netlog.c b/sys/src/9/ip/netlog.c index f07c3b961..ea155fe8f 100644 --- a/sys/src/9/ip/netlog.c +++ b/sys/src/9/ip/netlog.c @@ -51,6 +51,7 @@ static Netlogflag flags[] = { "udpmsg", Logudp|Logudpmsg, }, { "ipmsg", Logip|Logipmsg, }, { "esp", Logesp, }, + { "trans", Logtrans, }, { nil, 0, }, }; diff --git a/sys/src/9/ip/rudp.c b/sys/src/9/ip/rudp.c index c374f6abc..18842479f 100644 --- a/sys/src/9/ip/rudp.c +++ b/sys/src/9/ip/rudp.c @@ -220,9 +220,10 @@ rudpconnect(Conv *c, char **argv, int argc) rudpstartackproc(c->p); e = Fsstdconnect(c, argv, argc); Fsconnected(c, e); + if(e != nil) + return e; iphtadd(&upriv->ht, c); - - return e; + return nil; } @@ -256,7 +257,6 @@ rudpannounce(Conv *c, char** argv, int argc) return e; Fsconnected(c, nil); iphtadd(&upriv->ht, c); - return nil; } @@ -289,10 +289,11 @@ rudpclose(Conv *c) qclose(c->rq); qclose(c->wq); qclose(c->eq); - ipmove(c->laddr, IPnoaddr); - ipmove(c->raddr, IPnoaddr); + c->lport = 0; + ipmove(c->laddr, IPnoaddr); c->rport = 0; + ipmove(c->raddr, IPnoaddr); ucb->headers = 0; ucb->randdrop = 0; @@ -460,11 +461,12 @@ rudpkick(void *x) void rudpiput(Proto *rudp, Ipifc *ifc, Block *bp) { - int len, olen, ottl; + int len, olen; Udphdr *uh; + Iphash *iph; Conv *c; Rudpcb *ucb; - uchar raddr[IPaddrlen], laddr[IPaddrlen]; + uchar raddr[IPaddrlen], laddr[IPaddrlen], ottl; ushort rport, lport; Rudppriv *upriv; Fs *f; @@ -503,9 +505,8 @@ rudpiput(Proto *rudp, Ipifc *ifc, Block *bp) } qlock(rudp); - - c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); - if(c == nil){ + iph = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(iph == nil){ /* no conversation found */ upriv->ustats.rudpNoPorts++; qunlock(rudp); @@ -517,6 +518,7 @@ rudpiput(Proto *rudp, Ipifc *ifc, Block *bp) freeblist(bp); return; } + c = iphconv(iph); ucb = (Rudpcb*)c->ptcl; qlock(ucb); qunlock(rudp); diff --git a/sys/src/9/ip/tcp.c b/sys/src/9/ip/tcp.c index e4e6fa95b..d2a83a747 100644 --- a/sys/src/9/ip/tcp.c +++ b/sys/src/9/ip/tcp.c @@ -126,7 +126,7 @@ struct Tcp4hdr uchar length[2]; /* packet length */ uchar id[2]; /* Identification */ uchar frag[2]; /* Fragment information */ - uchar Unused; + uchar ttl; uchar proto; uchar tcplen[2]; uchar tcpsrc[4]; @@ -1814,9 +1814,7 @@ tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) } tcpsetstate(new, Established); - iphtadd(&tpriv->ht, new); - return new; } @@ -2068,10 +2066,11 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) Tcp seg; Tcp4hdr *h4; Tcp6hdr *h6; - int hdrlen; Tcpctl *tcb; - ushort length, csum; + int hdrlen; + ushort length; uchar source[IPaddrlen], dest[IPaddrlen]; + Iphash *iph; Conv *s; Fs *f; Tcppriv *tpriv; @@ -2087,21 +2086,32 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) h6 = (Tcp6hdr*)(bp->rp); if((h4->vihl&0xF0)==IP_VER4) { + int ttl = h4->ttl; + version = V4; length = nhgets(h4->length); + if(length < TCP4_PKT){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp len\n"); + freeblist(bp); + return; + } + length -= TCP4_PKT; v4tov6(dest, h4->tcpdst); v4tov6(source, h4->tcpsrc); - h4->Unused = 0; - hnputs(h4->tcplen, length-TCP4_PKT); - if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && - ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) { + h4->ttl = 0; + hnputs(h4->tcplen, length); + if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) + && ptclcsum(bp, TCP4_IPLEN, length + TCP4_PKT - TCP4_IPLEN)) { tpriv->stats[CsumErrs]++; tpriv->stats[InErrs]++; netlog(f, Logtcp, "bad tcp proto cksum\n"); freeblist(bp); return; } + h4->ttl = ttl; hdrlen = ntohtcp4(&seg, &bp); if(hdrlen < 0){ @@ -2110,16 +2120,8 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) netlog(f, Logtcp, "bad tcp hdr len\n"); return; } - - /* trim the packet to the size claimed by the datagram */ - length -= hdrlen+TCP4_PKT; - bp = trimblock(bp, hdrlen+TCP4_PKT, length); - if(bp == nil){ - tpriv->stats[LenErrs]++; - tpriv->stats[InErrs]++; - netlog(f, Logtcp, "tcp len < 0 after trim\n"); - return; - } + length -= hdrlen; + hdrlen += TCP4_PKT; } else { int ttl = h6->ttl; @@ -2133,13 +2135,13 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0; h6->ttl = proto; hnputl(h6->vcf, length); - if((h6->tcpcksum[0] || h6->tcpcksum[1]) && - (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) { + if((h6->tcpcksum[0] || h6->tcpcksum[1]) + && ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE) != 0) { tpriv->stats[CsumErrs]++; tpriv->stats[InErrs]++; netlog(f, Logtcp, - "bad tcpv6 proto cksum: got %#ux, computed %#ux\n", - h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum); + "bad tcpv6 proto cksum: got %#ux\n", + h6->tcpcksum[0]<<8 | h6->tcpcksum[1]); freeblist(bp); return; } @@ -2154,24 +2156,16 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) netlog(f, Logtcp, "bad tcpv6 hdr len\n"); return; } - - /* trim the packet to the size claimed by the datagram */ length -= hdrlen; - bp = trimblock(bp, hdrlen+TCP6_PKT, length); - if(bp == nil){ - tpriv->stats[LenErrs]++; - tpriv->stats[InErrs]++; - netlog(f, Logtcp, "tcpv6 len < 0 after trim\n"); - return; - } + hdrlen += TCP6_PKT; } /* lock protocol while searching for a conversation */ qlock(tcp); /* Look for a matching conversation */ - s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); - if(s == nil){ + iph = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); + if(iph == nil){ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n", source, seg.source, dest, seg.dest); reset: @@ -2180,6 +2174,30 @@ reset: sndrst(tcp, source, dest, length, &seg, version, "no conversation", nil); return; } + if(iph->trans){ + Translation *q; + int hop = h4->ttl; + + if(hop <= 1 || (q = transbackward(tcp, iph)) == nil) + goto reset; + hnputs_csum(h4->tcpdst+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcpcksum); + hnputs_csum(h4->tcpdst+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcpcksum); + hnputs_csum(h4->tcpdport, q->forward.rport, h4->tcpcksum); + qunlock(tcp); + ipoput4(f, bp, 1, hop - 1, h4->tos, q); + return; + } + s = iphconv(iph); + + /* trim off ip and tcp headers */ + bp = trimblock(bp, hdrlen, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp bad length after header trim off\n"); + qunlock(tcp); + return; + } /* if it's a listener, look for the right flags and get a new conv */ tcb = (Tcpctl*)s->ptcl; @@ -3200,11 +3218,12 @@ tcpadvise(Proto *tcp, Block *bp, char *msg) { Tcp4hdr *h4; Tcp6hdr *h6; - Tcpctl *tcb; uchar source[IPaddrlen]; uchar dest[IPaddrlen]; ushort psource, pdest; - Conv *s, **p; + Iphash *iph; + Tcpctl *tcb; + Conv *s; h4 = (Tcp4hdr*)(bp->rp); h6 = (Tcp6hdr*)(bp->rp); @@ -3221,33 +3240,73 @@ tcpadvise(Proto *tcp, Block *bp, char *msg) pdest = nhgets(h6->tcpdport); } - /* Look for a connection */ + /* Look for a connection (source/dest reversed; this is the original packet we sent) */ qlock(tcp); - for(p = tcp->conv; (s = *p) != nil; p++) { - tcb = (Tcpctl*)s->ptcl; - if(s->rport == pdest) - if(s->lport == psource) - if(tcb->state != Closed) - if(ipcmp(s->raddr, dest) == 0) - if(ipcmp(s->laddr, source) == 0){ - if(s->ignoreadvice) - break; - qlock(s); - qunlock(tcp); - switch(tcb->state){ - case Syn_sent: - localclose(s, msg); - break; - } - qunlock(s); - freeblist(bp); - return; - } + iph = iphtlook(&((Tcppriv*)tcp->priv)->ht, dest, pdest, source, psource); + if(iph == nil) + goto raise; + if(iph->trans){ + Translation *q; + + if((q = transbackward(tcp, iph)) == nil) + goto raise; + + /* h4->tcplen is the ip header checksum */ + hnputs_csum(h4->tcpsrc+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcplen); + hnputs_csum(h4->tcpsrc+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcplen); + + /* dont bother fixing tcp checksum, packet is most likely truncated */ + hnputs(h4->tcpsport, q->forward.rport); + qunlock(tcp); + + icmpproxyadvice(tcp->f, bp, h4->tcpsrc); + return; } + s = iphconv(iph); + if(s->ignoreadvice || s->state == Closed) + goto raise; + qlock(s); + qunlock(tcp); + tcb = (Tcpctl*)s->ptcl; + if(tcb->state == Syn_sent) + localclose(s, msg); + qunlock(s); + freeblist(bp); + return; +raise: qunlock(tcp); freeblist(bp); } +static Block* +tcpforward(Proto *tcp, Block *bp, Route *r) +{ + uchar da[IPaddrlen], sa[IPaddrlen]; + ushort dp, sp; + Tcp4hdr *h4; + Translation *q; + + h4 = (Tcp4hdr*)(bp->rp); + v4tov6(da, h4->tcpdst); + v4tov6(sa, h4->tcpsrc); + dp = nhgets(h4->tcpdport); + sp = nhgets(h4->tcpsport); + + qlock(tcp); + q = transforward(tcp, &((Tcppriv*)tcp->priv)->ht, sa, sp, da, dp, r); + if(q == nil){ + qunlock(tcp); + freeblist(bp); + return nil; + } + hnputs_csum(h4->tcpsrc+0, nhgets(q->backward.laddr+IPv4off+0), h4->tcpcksum); + hnputs_csum(h4->tcpsrc+2, nhgets(q->backward.laddr+IPv4off+2), h4->tcpcksum); + hnputs_csum(h4->tcpsport, q->backward.lport, h4->tcpcksum); + qunlock(tcp); + + return bp; +} + static char* tcpporthogdefensectl(char *val) { @@ -3371,6 +3430,7 @@ tcpinit(Fs *fs) tcp->close = tcpclose; tcp->rcv = tcpiput; tcp->advise = tcpadvise; + tcp->forward = tcpforward; tcp->stats = tcpstats; tcp->inuse = tcpinuse; tcp->gc = tcpgc; diff --git a/sys/src/9/ip/udp.c b/sys/src/9/ip/udp.c index 5800c7e8c..651e616df 100644 --- a/sys/src/9/ip/udp.c +++ b/sys/src/9/ip/udp.c @@ -39,7 +39,7 @@ struct Udp4hdr uchar length[2]; /* packet length */ uchar id[2]; /* Identification */ uchar frag[2]; /* Fragment information */ - uchar Unused; + uchar ttl; /* Time to live */ uchar udpproto; /* Protocol */ uchar udpplen[2]; /* Header plus data length */ uchar udpsrc[IPv4addrlen]; /* Ip source */ @@ -91,7 +91,6 @@ struct Udppriv ulong lenerr; /* short packet */ }; -void (*etherprofiler)(char *name, int qlen); void udpkick(void *x, Block *bp); /* @@ -114,7 +113,6 @@ udpconnect(Conv *c, char **argv, int argc) Fsconnected(c, e); if(e != nil) return e; - iphtadd(&upriv->ht, c); return nil; } @@ -142,7 +140,6 @@ udpannounce(Conv *c, char** argv, int argc) return e; Fsconnected(c, nil); iphtadd(&upriv->ht, c); - return nil; } @@ -166,10 +163,10 @@ udpclose(Conv *c) qclose(c->rq); qclose(c->wq); qclose(c->eq); - ipmove(c->laddr, IPnoaddr); - ipmove(c->raddr, IPnoaddr); c->lport = 0; + ipmove(c->laddr, IPnoaddr); c->rport = 0; + ipmove(c->raddr, IPnoaddr); ucb = (Udpcb*)c->ptcl; ucb->headers = 0; @@ -238,7 +235,7 @@ udpkick(void *x, Block *bp) bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ); uh4 = (Udp4hdr *)(bp->rp); ptcllen = dlen + UDP_UDPHDR_SZ; - uh4->Unused = 0; + uh4->ttl = 0; uh4->udpproto = IP_UDPPROTO; uh4->frag[0] = 0; uh4->frag[1] = 0; @@ -319,6 +316,7 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) int len; Udp4hdr *uh4; Udp6hdr *uh6; + Iphash *iph; Conv *c; Udpcb *ucb; uchar raddr[IPaddrlen], laddr[IPaddrlen]; @@ -334,14 +332,15 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) upriv->ustats.udpInDatagrams++; uh4 = (Udp4hdr*)(bp->rp); + uh6 = (Udp6hdr*)(bp->rp); version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4; /* Put back pseudo header for checksum * (remember old values for icmpnoconv()) */ switch(version) { case V4: - ottl = uh4->Unused; - uh4->Unused = 0; + ottl = uh4->ttl; + uh4->ttl = 0; len = nhgets(uh4->udplen); olen = nhgets(uh4->udpplen); hnputs(uh4->udpplen, len); @@ -360,11 +359,10 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) return; } } - uh4->Unused = ottl; + uh4->ttl = ottl; hnputs(uh4->udpplen, olen); break; case V6: - uh6 = (Udp6hdr*)(bp->rp); len = nhgets(uh6->udplen); oviclfl = nhgetl(uh6->viclfl); olen = nhgets(uh6->len); @@ -394,9 +392,8 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) } qlock(udp); - - c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); - if(c == nil){ + iph = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(iph == nil){ /* no conversation found */ upriv->ustats.udpNoPorts++; qunlock(udp); @@ -417,6 +414,26 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) freeblist(bp); return; } + if(iph->trans){ + Translation *q; + int hop = uh4->ttl; + if(hop <= 1 || (q = transbackward(udp, iph)) == nil){ + qunlock(udp); + freeblist(bp); + return; + } + hnputs_csum(uh4->udpdst+0, nhgets(q->forward.raddr+IPv4off+0), uh4->udpcksum); + hnputs_csum(uh4->udpdst+2, nhgets(q->forward.raddr+IPv4off+2), uh4->udpcksum); + hnputs_csum(uh4->udpdport, q->forward.rport, uh4->udpcksum); + + /* only use route-hint when from original desination */ + if(memcmp(uh4->udpsrc, q->forward.laddr+IPv4off, IPv4addrlen) != 0) + q = nil; + qunlock(udp); + ipoput4(f, bp, 1, hop - 1, uh4->tos, q); + return; + } + c = iphconv(iph); ucb = (Udpcb*)c->ptcl; if(c->state == Announced){ @@ -487,7 +504,6 @@ udpiput(Proto *udp, Ipifc *ifc, Block *bp) qpass(c->rq, concatblock(bp)); } qunlock(c); - } char* @@ -517,7 +533,8 @@ udpadvise(Proto *udp, Block *bp, char *msg) Udp6hdr *h6; uchar source[IPaddrlen], dest[IPaddrlen]; ushort psource, pdest; - Conv *s, **p; + Iphash *iph; + Conv *s; h4 = (Udp4hdr*)(bp->rp); h6 = (Udp6hdr*)(bp->rp); @@ -534,28 +551,72 @@ udpadvise(Proto *udp, Block *bp, char *msg) pdest = nhgets(h6->udpdport); } - /* Look for a connection */ + /* Look for a connection (source/dest reversed; this is the original packet we sent) */ qlock(udp); - for(p = udp->conv; (s = *p) != nil; p++) { - if(s->rport == pdest) - if(s->lport == psource) - if(ipcmp(s->raddr, dest) == 0) - if(ipcmp(s->laddr, source) == 0){ - if(s->ignoreadvice) - break; - qlock(s); - qunlock(udp); - qhangup(s->rq, msg); - qhangup(s->wq, msg); - qunlock(s); - freeblist(bp); - return; - } + iph = iphtlook(&((Udppriv*)udp->priv)->ht, dest, pdest, source, psource); + if(iph == nil) + goto raise; + if(iph->trans){ + Translation *q; + + if((q = transbackward(udp, iph)) == nil) + goto raise; + + /* h4->udpplen is the ip header checksum */ + hnputs_csum(h4->udpsrc+0, nhgets(q->forward.raddr+IPv4off+0), h4->udpplen); + hnputs_csum(h4->udpsrc+2, nhgets(q->forward.raddr+IPv4off+2), h4->udpplen); + + /* dont bother fixing udp checksum, packet is most likely truncated */ + hnputs(h4->udpsport, q->forward.rport); + qunlock(udp); + + icmpproxyadvice(udp->f, bp, h4->udpsrc); + return; } + s = iphconv(iph); + if(s->ignoreadvice) + goto raise; + qlock(s); + qunlock(udp); + qhangup(s->rq, msg); + qhangup(s->wq, msg); + qunlock(s); + freeblist(bp); + return; +raise: qunlock(udp); freeblist(bp); } +Block* +udpforward(Proto *udp, Block *bp, Route *r) +{ + uchar da[IPaddrlen], sa[IPaddrlen]; + ushort dp, sp; + Udp4hdr *uh4; + Translation *q; + + uh4 = (Udp4hdr*)(bp->rp); + v4tov6(sa, uh4->udpsrc); + v4tov6(da, uh4->udpdst); + dp = nhgets(uh4->udpdport); + sp = nhgets(uh4->udpsport); + + qlock(udp); + q = transforward(udp, &((Udppriv*)udp->priv)->ht, sa, sp, da, dp, r); + if(q == nil){ + qunlock(udp); + freeblist(bp); + return nil; + } + hnputs_csum(uh4->udpsrc+0, nhgets(q->backward.laddr+IPv4off+0), uh4->udpcksum); + hnputs_csum(uh4->udpsrc+2, nhgets(q->backward.laddr+IPv4off+2), uh4->udpcksum); + hnputs_csum(uh4->udpsport, q->backward.lport, uh4->udpcksum); + qunlock(udp); + + return bp; +} + int udpstats(Proto *udp, char *buf, int len) { @@ -586,6 +647,7 @@ udpinit(Fs *fs) udp->close = udpclose; udp->rcv = udpiput; udp->advise = udpadvise; + udp->forward = udpforward; udp->stats = udpstats; udp->ipproto = IP_UDPPROTO; udp->nc = Nchans;