diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2022-03-12 20:53:17 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2022-03-12 20:53:17 +0000 |
commit | d2a7d886624c56673a6d7ba7d6a7958d2be5b867 (patch) | |
tree | 483c16a36a4fcb97f66708a0d11f1e43f6fcbddf /sys/src/9/ip/tcp.c | |
parent | c14ea9fdd1521ff9322f9af71b801e016622c0cd (diff) |
devip: implement network address translation routes
This adds a new route "t"-flag that enables network address translation,
replacing the source address (and local port) of a forwarded packet to
one of the outgoing interface.
The state for a translation is kept in a new Translation structure,
which contains two Iphash entries, so it can be inserted into the
per protocol 4-tuple hash table, requiering no extra lookups.
Translations have a low overhead (~200 bytes on amd64),
so we can have many of them. They get reused after 5 minutes
of inactivity or when the per protocol limit of 1000 entries
is reached (then the one with longest inactivity is reused).
The protocol needs to export a "forward" function that is responsible
for modifying the forwarded packet, and then handle translations in
its input function for iphash hits with Iphash.trans != 0.
This patch also fixes a few minor things found during development:
- Include the Iphash in the Conv structure, avoiding estra malloc
- Fix ttl exceeded check (ttl < 1 -> ttl <= 1)
- Router should not reply with ttl exceeded for multicast flows
- Extra checks for icmp advice to avoid protocol confusions.
Diffstat (limited to 'sys/src/9/ip/tcp.c')
-rw-r--r-- | sys/src/9/ip/tcp.c | 174 |
1 files changed, 117 insertions, 57 deletions
diff --git a/sys/src/9/ip/tcp.c b/sys/src/9/ip/tcp.c index e4e6fa95b..d2a83a747 100644 --- a/sys/src/9/ip/tcp.c +++ b/sys/src/9/ip/tcp.c @@ -126,7 +126,7 @@ struct Tcp4hdr uchar length[2]; /* packet length */ uchar id[2]; /* Identification */ uchar frag[2]; /* Fragment information */ - uchar Unused; + uchar ttl; uchar proto; uchar tcplen[2]; uchar tcpsrc[4]; @@ -1814,9 +1814,7 @@ tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) } tcpsetstate(new, Established); - iphtadd(&tpriv->ht, new); - return new; } @@ -2068,10 +2066,11 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) Tcp seg; Tcp4hdr *h4; Tcp6hdr *h6; - int hdrlen; Tcpctl *tcb; - ushort length, csum; + int hdrlen; + ushort length; uchar source[IPaddrlen], dest[IPaddrlen]; + Iphash *iph; Conv *s; Fs *f; Tcppriv *tpriv; @@ -2087,21 +2086,32 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) h6 = (Tcp6hdr*)(bp->rp); if((h4->vihl&0xF0)==IP_VER4) { + int ttl = h4->ttl; + version = V4; length = nhgets(h4->length); + if(length < TCP4_PKT){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp len\n"); + freeblist(bp); + return; + } + length -= TCP4_PKT; v4tov6(dest, h4->tcpdst); v4tov6(source, h4->tcpsrc); - h4->Unused = 0; - hnputs(h4->tcplen, length-TCP4_PKT); - if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && - ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) { + h4->ttl = 0; + hnputs(h4->tcplen, length); + if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) + && ptclcsum(bp, TCP4_IPLEN, length + TCP4_PKT - TCP4_IPLEN)) { tpriv->stats[CsumErrs]++; tpriv->stats[InErrs]++; netlog(f, Logtcp, "bad tcp proto cksum\n"); freeblist(bp); return; } + h4->ttl = ttl; hdrlen = ntohtcp4(&seg, &bp); if(hdrlen < 0){ @@ -2110,16 +2120,8 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) netlog(f, Logtcp, "bad tcp hdr len\n"); return; } - - /* trim the packet to the size claimed by the datagram */ - length -= hdrlen+TCP4_PKT; - bp = trimblock(bp, hdrlen+TCP4_PKT, length); - if(bp == nil){ - tpriv->stats[LenErrs]++; - tpriv->stats[InErrs]++; - netlog(f, Logtcp, "tcp len < 0 after trim\n"); - return; - } + length -= hdrlen; + hdrlen += TCP4_PKT; } else { int ttl = h6->ttl; @@ -2133,13 +2135,13 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0; h6->ttl = proto; hnputl(h6->vcf, length); - if((h6->tcpcksum[0] || h6->tcpcksum[1]) && - (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) { + if((h6->tcpcksum[0] || h6->tcpcksum[1]) + && ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE) != 0) { tpriv->stats[CsumErrs]++; tpriv->stats[InErrs]++; netlog(f, Logtcp, - "bad tcpv6 proto cksum: got %#ux, computed %#ux\n", - h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum); + "bad tcpv6 proto cksum: got %#ux\n", + h6->tcpcksum[0]<<8 | h6->tcpcksum[1]); freeblist(bp); return; } @@ -2154,24 +2156,16 @@ tcpiput(Proto *tcp, Ipifc*, Block *bp) netlog(f, Logtcp, "bad tcpv6 hdr len\n"); return; } - - /* trim the packet to the size claimed by the datagram */ length -= hdrlen; - bp = trimblock(bp, hdrlen+TCP6_PKT, length); - if(bp == nil){ - tpriv->stats[LenErrs]++; - tpriv->stats[InErrs]++; - netlog(f, Logtcp, "tcpv6 len < 0 after trim\n"); - return; - } + hdrlen += TCP6_PKT; } /* lock protocol while searching for a conversation */ qlock(tcp); /* Look for a matching conversation */ - s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); - if(s == nil){ + iph = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); + if(iph == nil){ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n", source, seg.source, dest, seg.dest); reset: @@ -2180,6 +2174,30 @@ reset: sndrst(tcp, source, dest, length, &seg, version, "no conversation", nil); return; } + if(iph->trans){ + Translation *q; + int hop = h4->ttl; + + if(hop <= 1 || (q = transbackward(tcp, iph)) == nil) + goto reset; + hnputs_csum(h4->tcpdst+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcpcksum); + hnputs_csum(h4->tcpdst+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcpcksum); + hnputs_csum(h4->tcpdport, q->forward.rport, h4->tcpcksum); + qunlock(tcp); + ipoput4(f, bp, 1, hop - 1, h4->tos, q); + return; + } + s = iphconv(iph); + + /* trim off ip and tcp headers */ + bp = trimblock(bp, hdrlen, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp bad length after header trim off\n"); + qunlock(tcp); + return; + } /* if it's a listener, look for the right flags and get a new conv */ tcb = (Tcpctl*)s->ptcl; @@ -3200,11 +3218,12 @@ tcpadvise(Proto *tcp, Block *bp, char *msg) { Tcp4hdr *h4; Tcp6hdr *h6; - Tcpctl *tcb; uchar source[IPaddrlen]; uchar dest[IPaddrlen]; ushort psource, pdest; - Conv *s, **p; + Iphash *iph; + Tcpctl *tcb; + Conv *s; h4 = (Tcp4hdr*)(bp->rp); h6 = (Tcp6hdr*)(bp->rp); @@ -3221,31 +3240,71 @@ tcpadvise(Proto *tcp, Block *bp, char *msg) pdest = nhgets(h6->tcpdport); } - /* Look for a connection */ + /* Look for a connection (source/dest reversed; this is the original packet we sent) */ qlock(tcp); - for(p = tcp->conv; (s = *p) != nil; p++) { - tcb = (Tcpctl*)s->ptcl; - if(s->rport == pdest) - if(s->lport == psource) - if(tcb->state != Closed) - if(ipcmp(s->raddr, dest) == 0) - if(ipcmp(s->laddr, source) == 0){ - if(s->ignoreadvice) - break; - qlock(s); - qunlock(tcp); - switch(tcb->state){ - case Syn_sent: - localclose(s, msg); - break; - } - qunlock(s); - freeblist(bp); - return; - } + iph = iphtlook(&((Tcppriv*)tcp->priv)->ht, dest, pdest, source, psource); + if(iph == nil) + goto raise; + if(iph->trans){ + Translation *q; + + if((q = transbackward(tcp, iph)) == nil) + goto raise; + + /* h4->tcplen is the ip header checksum */ + hnputs_csum(h4->tcpsrc+0, nhgets(q->forward.raddr+IPv4off+0), h4->tcplen); + hnputs_csum(h4->tcpsrc+2, nhgets(q->forward.raddr+IPv4off+2), h4->tcplen); + + /* dont bother fixing tcp checksum, packet is most likely truncated */ + hnputs(h4->tcpsport, q->forward.rport); + qunlock(tcp); + + icmpproxyadvice(tcp->f, bp, h4->tcpsrc); + return; } + s = iphconv(iph); + if(s->ignoreadvice || s->state == Closed) + goto raise; + qlock(s); qunlock(tcp); + tcb = (Tcpctl*)s->ptcl; + if(tcb->state == Syn_sent) + localclose(s, msg); + qunlock(s); freeblist(bp); + return; +raise: + qunlock(tcp); + freeblist(bp); +} + +static Block* +tcpforward(Proto *tcp, Block *bp, Route *r) +{ + uchar da[IPaddrlen], sa[IPaddrlen]; + ushort dp, sp; + Tcp4hdr *h4; + Translation *q; + + h4 = (Tcp4hdr*)(bp->rp); + v4tov6(da, h4->tcpdst); + v4tov6(sa, h4->tcpsrc); + dp = nhgets(h4->tcpdport); + sp = nhgets(h4->tcpsport); + + qlock(tcp); + q = transforward(tcp, &((Tcppriv*)tcp->priv)->ht, sa, sp, da, dp, r); + if(q == nil){ + qunlock(tcp); + freeblist(bp); + return nil; + } + hnputs_csum(h4->tcpsrc+0, nhgets(q->backward.laddr+IPv4off+0), h4->tcpcksum); + hnputs_csum(h4->tcpsrc+2, nhgets(q->backward.laddr+IPv4off+2), h4->tcpcksum); + hnputs_csum(h4->tcpsport, q->backward.lport, h4->tcpcksum); + qunlock(tcp); + + return bp; } static char* @@ -3371,6 +3430,7 @@ tcpinit(Fs *fs) tcp->close = tcpclose; tcp->rcv = tcpiput; tcp->advise = tcpadvise; + tcp->forward = tcpforward; tcp->stats = tcpstats; tcp->inuse = tcpinuse; tcp->gc = tcpgc; |